From 3a23053adb3128e1edd1f3ed852a849c21ca0c6d Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Mon, 25 Feb 2019 15:26:24 +0100 Subject: [PATCH 1/5] adds list studies --- openml/study/__init__.py | 6 +- openml/study/functions.py | 97 ++++++++++++++++++++++++ tests/test_study/test_study_functions.py | 5 ++ 3 files changed, 106 insertions(+), 2 deletions(-) diff --git a/openml/study/__init__.py b/openml/study/__init__.py index f99b0d638..026591f46 100644 --- a/openml/study/__init__.py +++ b/openml/study/__init__.py @@ -1,9 +1,11 @@ from .study import OpenMLStudy from .functions import get_study, create_study, create_benchmark_suite, \ - status_update, attach_to_study, detach_from_study, delete_study + status_update, attach_to_study, detach_from_study, delete_study, \ + list_studies __all__ = [ 'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study', - 'delete_study', 'detach_from_study', 'get_study', 'status_update', + 'delete_study', 'detach_from_study', 'get_study', 'list_studies', + 'status_update' ] diff --git a/openml/study/functions.py b/openml/study/functions.py index a2600e4a0..3ab49460c 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -278,3 +278,100 @@ def detach_from_study(study_id, entity_ids): post_variables) result = xmltodict.parse(result_xml)['oml:study_detach'] return int(result['oml:linked_entities']) + + +def list_studies(offset=None, size=None, main_entity_type=None,status=None, + uploader=None): + """ + Return a list of all studies which are on OpenML. + + Parameters + ---------- + offset : int, optional + The number of studies to skip, starting from the first. + size : int, optional + The maximum number of studies to show. + main_entity_type : str, optional + Can be `task` or `run`. In case of `task`, only benchmark suites are + returned. In case of `run`, only studies are returned. + status : str, optional + Should be {active, in_preparation, deactivated, all}. By default active + studies are returned. + uploader : list (int), optional + Result filter. Will only return studies created by these users. + + Returns + ------- + datasets : dict of dicts + A mapping from dataset ID to dict. + + Every dataset is represented by a dictionary containing + the following information: + - id + - name + - main_entity_type + - status + - creator + - creation_date + + If qualities are calculated for the dataset, some of + these are also returned. + """ + return openml.utils._list_all(_list_studies, + offset=offset, + size=size, + main_entity_type=main_entity_type, + status=status, + uploader=uploader) + + +def _list_studies(**kwargs): + """ + Perform api call to return a list of studies. + + Parameters + ---------- + kwargs : dict, optional + Legal filter operators (keys in the dict): + status, limit, offset, main_entity_type, uploader + + Returns + ------- + studies : dict of dicts + """ + api_call = "study/list" + if kwargs is not None: + for operator, value in kwargs.items(): + api_call += "/%s/%s" % (operator, value) + return __list_studies(api_call) + + +def __list_studies(api_call): + xml_string = openml._api_calls._perform_api_call(api_call, 'get') + study_dict = xmltodict.parse(xml_string, force_list=('oml:study',)) + + # Minimalistic check if the XML is useful + assert type(study_dict['oml:study_list']['oml:study']) == list, \ + type(study_dict['oml:study_list']) + assert study_dict['oml:study_list']['@xmlns:oml'] == \ + 'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml'] + + studies = dict() + for study_ in study_dict['oml:study_list']['oml:study']: + expected_fields = { + 'oml:id': 'id', + 'oml:alias': 'alias', + 'oml:main_entity_type': 'main_entity_type', + 'oml:name': 'name', + 'oml:status': 'status', + 'oml:creation_date': 'creation_date', + 'oml:creator': 'creator' + } + study_id = int(study_['oml:id']) + current_study = dict() + for oml_field_name, real_field_name in expected_fields.items(): + if oml_field_name in study_: + current_study[real_field_name] = study_[oml_field_name] + current_study['id'] = int(current_study['id']) + studies[study_id] = current_study + return studies diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 23f6ff32d..4cb19a58b 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -159,3 +159,8 @@ def test_study_attach_illegal(self): openml.study.attach_to_study(study_id, list(run_list_more.keys())) study_downloaded = openml.study.get_study(study_id) self.assertListEqual(study_original.runs, study_downloaded.runs) + + def test_study_list(self): + study_list = openml.study.list_studies(status='in_preparation') + # might fail if server is recently resetted + self.assertGreater(len(study_list), 2) From ab5299bf7614a1aca6e1e8e1fa57e144ed1ece85 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Mon, 25 Feb 2019 15:41:23 +0100 Subject: [PATCH 2/5] PEP8 --- openml/study/__init__.py | 2 +- openml/study/functions.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/openml/study/__init__.py b/openml/study/__init__.py index 026591f46..751beffa9 100644 --- a/openml/study/__init__.py +++ b/openml/study/__init__.py @@ -6,6 +6,6 @@ __all__ = [ 'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study', - 'delete_study', 'detach_from_study', 'get_study', 'list_studies', + 'delete_study', 'detach_from_study', 'get_study', 'list_studies', 'status_update' ] diff --git a/openml/study/functions.py b/openml/study/functions.py index 3ab49460c..65dacf407 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -280,7 +280,7 @@ def detach_from_study(study_id, entity_ids): return int(result['oml:linked_entities']) -def list_studies(offset=None, size=None, main_entity_type=None,status=None, +def list_studies(offset=None, size=None, main_entity_type=None, status=None, uploader=None): """ Return a list of all studies which are on OpenML. @@ -293,7 +293,7 @@ def list_studies(offset=None, size=None, main_entity_type=None,status=None, The maximum number of studies to show. main_entity_type : str, optional Can be `task` or `run`. In case of `task`, only benchmark suites are - returned. In case of `run`, only studies are returned. + returned. In case of `run`, only studies are returned. status : str, optional Should be {active, in_preparation, deactivated, all}. By default active studies are returned. From 7db4c705fea7401d468113cd80daad9b1b7196aa Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Mon, 25 Feb 2019 16:00:26 +0100 Subject: [PATCH 3/5] benchmark suite --- openml/study/functions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/openml/study/functions.py b/openml/study/functions.py index 65dacf407..21fac6726 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -281,7 +281,7 @@ def detach_from_study(study_id, entity_ids): def list_studies(offset=None, size=None, main_entity_type=None, status=None, - uploader=None): + uploader=None, benchmark_suite=None): """ Return a list of all studies which are on OpenML. @@ -308,8 +308,10 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None, Every dataset is represented by a dictionary containing the following information: - id + - alias (optional) - name - main_entity_type + - benchmark_suite (optional) - status - creator - creation_date @@ -322,7 +324,8 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None, size=size, main_entity_type=main_entity_type, status=status, - uploader=uploader) + uploader=uploader, + benchmark_suite=benchmark_suite) def _list_studies(**kwargs): @@ -362,6 +365,7 @@ def __list_studies(api_call): 'oml:id': 'id', 'oml:alias': 'alias', 'oml:main_entity_type': 'main_entity_type', + 'oml:benchmark_suite': 'benchmark_suite', 'oml:name': 'name', 'oml:status': 'status', 'oml:creation_date': 'creation_date', From 42b9668120215d8cd9a1a777c470eb03e331eed8 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Tue, 26 Feb 2019 14:23:13 +0100 Subject: [PATCH 4/5] fix unit tests --- openml/study/study.py | 1 - tests/test_study/test_study_functions.py | 6 +++--- tests/test_tasks/test_task_functions.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/openml/study/study.py b/openml/study/study.py index a07b4b5bf..6e9311675 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -83,7 +83,6 @@ def publish(self): file_elements = { 'description': self._to_xml() } - return_value = openml._api_calls._perform_api_call( "study/", 'post', diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 4cb19a58b..9a91beb61 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -18,13 +18,13 @@ def test_get_study(self): self.assertEqual(len(study.setups), 30) def test_get_tasks(self): - study_id = 14 + study_id = 1 study = openml.study.get_study(study_id, 'tasks') - self.assertGreater(len(study.tasks), 0) + self.assertGreater(len(study.data), 0) + self.assertGreaterEqual(len(study.tasks), len(study.data)) # note that other entities are None, even though this study has # datasets - self.assertIsNone(study.data) self.assertIsNone(study.flows) self.assertIsNone(study.setups) self.assertIsNone(study.runs) diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 867c14d1b..02b505fc6 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -73,7 +73,7 @@ def test_list_tasks_empty(self): def test_list_tasks_by_tag(self): num_basic_tasks = 100 # number is flexible, check server if fails - tasks = openml.tasks.list_tasks(tag='study_14') + tasks = openml.tasks.list_tasks(tag='OpenML100') self.assertGreaterEqual(len(tasks), num_basic_tasks) for tid in tasks: self._check_task(tasks[tid]) From 4f60c2587ac779e5592cdfbce24c19ba6d87ea55 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Tue, 26 Feb 2019 14:26:22 +0100 Subject: [PATCH 5/5] comments by Matthias F. --- openml/study/functions.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/openml/study/functions.py b/openml/study/functions.py index 21fac6726..6c0c67b44 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -292,8 +292,8 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None, size : int, optional The maximum number of studies to show. main_entity_type : str, optional - Can be `task` or `run`. In case of `task`, only benchmark suites are - returned. In case of `run`, only studies are returned. + Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark + suites are returned. In case of `run`, only studies are returned. status : str, optional Should be {active, in_preparation, deactivated, all}. By default active studies are returned. @@ -361,21 +361,22 @@ def __list_studies(api_call): studies = dict() for study_ in study_dict['oml:study_list']['oml:study']: + # maps from xml name to a tuple of (dict name, casting fn) expected_fields = { - 'oml:id': 'id', - 'oml:alias': 'alias', - 'oml:main_entity_type': 'main_entity_type', - 'oml:benchmark_suite': 'benchmark_suite', - 'oml:name': 'name', - 'oml:status': 'status', - 'oml:creation_date': 'creation_date', - 'oml:creator': 'creator' + 'oml:id': ('id', int), + 'oml:alias': ('alias', str), + 'oml:main_entity_type': ('main_entity_type', str), + 'oml:benchmark_suite': ('benchmark_suite', int), + 'oml:name': ('name', str), + 'oml:status': ('status', str), + 'oml:creation_date': ('creation_date', str), + 'oml:creator': ('creator', int), } study_id = int(study_['oml:id']) current_study = dict() - for oml_field_name, real_field_name in expected_fields.items(): + for oml_field_name, (real_field_name, cast_fn) in expected_fields.items(): if oml_field_name in study_: - current_study[real_field_name] = study_[oml_field_name] + current_study[real_field_name] = cast_fn(study_[oml_field_name]) current_study['id'] = int(current_study['id']) studies[study_id] = current_study return studies