diff --git a/openml/study/__init__.py b/openml/study/__init__.py index f99b0d638..751beffa9 100644 --- a/openml/study/__init__.py +++ b/openml/study/__init__.py @@ -1,9 +1,11 @@ from .study import OpenMLStudy from .functions import get_study, create_study, create_benchmark_suite, \ - status_update, attach_to_study, detach_from_study, delete_study + status_update, attach_to_study, detach_from_study, delete_study, \ + list_studies __all__ = [ 'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study', - 'delete_study', 'detach_from_study', 'get_study', 'status_update', + 'delete_study', 'detach_from_study', 'get_study', 'list_studies', + 'status_update' ] diff --git a/openml/study/functions.py b/openml/study/functions.py index a2600e4a0..6c0c67b44 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -278,3 +278,105 @@ def detach_from_study(study_id, entity_ids): post_variables) result = xmltodict.parse(result_xml)['oml:study_detach'] return int(result['oml:linked_entities']) + + +def list_studies(offset=None, size=None, main_entity_type=None, status=None, + uploader=None, benchmark_suite=None): + """ + Return a list of all studies which are on OpenML. + + Parameters + ---------- + offset : int, optional + The number of studies to skip, starting from the first. + size : int, optional + The maximum number of studies to show. + main_entity_type : str, optional + Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark + suites are returned. In case of `run`, only studies are returned. + status : str, optional + Should be {active, in_preparation, deactivated, all}. By default active + studies are returned. + uploader : list (int), optional + Result filter. Will only return studies created by these users. + + Returns + ------- + datasets : dict of dicts + A mapping from dataset ID to dict. + + Every dataset is represented by a dictionary containing + the following information: + - id + - alias (optional) + - name + - main_entity_type + - benchmark_suite (optional) + - status + - creator + - creation_date + + If qualities are calculated for the dataset, some of + these are also returned. + """ + return openml.utils._list_all(_list_studies, + offset=offset, + size=size, + main_entity_type=main_entity_type, + status=status, + uploader=uploader, + benchmark_suite=benchmark_suite) + + +def _list_studies(**kwargs): + """ + Perform api call to return a list of studies. + + Parameters + ---------- + kwargs : dict, optional + Legal filter operators (keys in the dict): + status, limit, offset, main_entity_type, uploader + + Returns + ------- + studies : dict of dicts + """ + api_call = "study/list" + if kwargs is not None: + for operator, value in kwargs.items(): + api_call += "/%s/%s" % (operator, value) + return __list_studies(api_call) + + +def __list_studies(api_call): + xml_string = openml._api_calls._perform_api_call(api_call, 'get') + study_dict = xmltodict.parse(xml_string, force_list=('oml:study',)) + + # Minimalistic check if the XML is useful + assert type(study_dict['oml:study_list']['oml:study']) == list, \ + type(study_dict['oml:study_list']) + assert study_dict['oml:study_list']['@xmlns:oml'] == \ + 'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml'] + + studies = dict() + for study_ in study_dict['oml:study_list']['oml:study']: + # maps from xml name to a tuple of (dict name, casting fn) + expected_fields = { + 'oml:id': ('id', int), + 'oml:alias': ('alias', str), + 'oml:main_entity_type': ('main_entity_type', str), + 'oml:benchmark_suite': ('benchmark_suite', int), + 'oml:name': ('name', str), + 'oml:status': ('status', str), + 'oml:creation_date': ('creation_date', str), + 'oml:creator': ('creator', int), + } + study_id = int(study_['oml:id']) + current_study = dict() + for oml_field_name, (real_field_name, cast_fn) in expected_fields.items(): + if oml_field_name in study_: + current_study[real_field_name] = cast_fn(study_[oml_field_name]) + current_study['id'] = int(current_study['id']) + studies[study_id] = current_study + return studies diff --git a/openml/study/study.py b/openml/study/study.py index a07b4b5bf..6e9311675 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -83,7 +83,6 @@ def publish(self): file_elements = { 'description': self._to_xml() } - return_value = openml._api_calls._perform_api_call( "study/", 'post', diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 23f6ff32d..9a91beb61 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -18,13 +18,13 @@ def test_get_study(self): self.assertEqual(len(study.setups), 30) def test_get_tasks(self): - study_id = 14 + study_id = 1 study = openml.study.get_study(study_id, 'tasks') - self.assertGreater(len(study.tasks), 0) + self.assertGreater(len(study.data), 0) + self.assertGreaterEqual(len(study.tasks), len(study.data)) # note that other entities are None, even though this study has # datasets - self.assertIsNone(study.data) self.assertIsNone(study.flows) self.assertIsNone(study.setups) self.assertIsNone(study.runs) @@ -159,3 +159,8 @@ def test_study_attach_illegal(self): openml.study.attach_to_study(study_id, list(run_list_more.keys())) study_downloaded = openml.study.get_study(study_id) self.assertListEqual(study_original.runs, study_downloaded.runs) + + def test_study_list(self): + study_list = openml.study.list_studies(status='in_preparation') + # might fail if server is recently resetted + self.assertGreater(len(study_list), 2) diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 867c14d1b..02b505fc6 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -73,7 +73,7 @@ def test_list_tasks_empty(self): def test_list_tasks_by_tag(self): num_basic_tasks = 100 # number is flexible, check server if fails - tasks = openml.tasks.list_tasks(tag='study_14') + tasks = openml.tasks.list_tasks(tag='OpenML100') self.assertGreaterEqual(len(tasks), num_basic_tasks) for tid in tasks: self._check_task(tasks[tid])