Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions openml/study/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from .study import OpenMLStudy
from .functions import get_study, create_study, create_benchmark_suite, \
status_update, attach_to_study, detach_from_study, delete_study
status_update, attach_to_study, detach_from_study, delete_study, \
list_studies


__all__ = [
'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
'delete_study', 'detach_from_study', 'get_study', 'status_update',
'delete_study', 'detach_from_study', 'get_study', 'list_studies',
'status_update'
]
102 changes: 102 additions & 0 deletions openml/study/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,105 @@ def detach_from_study(study_id, entity_ids):
post_variables)
result = xmltodict.parse(result_xml)['oml:study_detach']
return int(result['oml:linked_entities'])


def list_studies(offset=None, size=None, main_entity_type=None, status=None,
uploader=None, benchmark_suite=None):
"""
Return a list of all studies which are on OpenML.

Parameters
----------
offset : int, optional
The number of studies to skip, starting from the first.
size : int, optional
The maximum number of studies to show.
main_entity_type : str, optional
Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark
suites are returned. In case of `run`, only studies are returned.
status : str, optional
Should be {active, in_preparation, deactivated, all}. By default active
studies are returned.
uploader : list (int), optional
Result filter. Will only return studies created by these users.

Returns
-------
datasets : dict of dicts
A mapping from dataset ID to dict.

Every dataset is represented by a dictionary containing
the following information:
- id
- alias (optional)
- name
- main_entity_type
- benchmark_suite (optional)
- status
- creator
- creation_date

If qualities are calculated for the dataset, some of
these are also returned.
"""
return openml.utils._list_all(_list_studies,
offset=offset,
size=size,
main_entity_type=main_entity_type,
status=status,
uploader=uploader,
benchmark_suite=benchmark_suite)


def _list_studies(**kwargs):
"""
Perform api call to return a list of studies.

Parameters
----------
kwargs : dict, optional
Legal filter operators (keys in the dict):
status, limit, offset, main_entity_type, uploader

Returns
-------
studies : dict of dicts
"""
api_call = "study/list"
if kwargs is not None:
for operator, value in kwargs.items():
api_call += "/%s/%s" % (operator, value)
return __list_studies(api_call)


def __list_studies(api_call):
xml_string = openml._api_calls._perform_api_call(api_call, 'get')
study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))

# Minimalistic check if the XML is useful
assert type(study_dict['oml:study_list']['oml:study']) == list, \
type(study_dict['oml:study_list'])
assert study_dict['oml:study_list']['@xmlns:oml'] == \
'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']

studies = dict()
for study_ in study_dict['oml:study_list']['oml:study']:
# maps from xml name to a tuple of (dict name, casting fn)
expected_fields = {
'oml:id': ('id', int),
'oml:alias': ('alias', str),
'oml:main_entity_type': ('main_entity_type', str),
'oml:benchmark_suite': ('benchmark_suite', int),
'oml:name': ('name', str),
'oml:status': ('status', str),
'oml:creation_date': ('creation_date', str),
'oml:creator': ('creator', int),
}
study_id = int(study_['oml:id'])
current_study = dict()
for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
if oml_field_name in study_:
current_study[real_field_name] = cast_fn(study_[oml_field_name])
current_study['id'] = int(current_study['id'])
studies[study_id] = current_study
return studies
1 change: 0 additions & 1 deletion openml/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def publish(self):
file_elements = {
'description': self._to_xml()
}

return_value = openml._api_calls._perform_api_call(
"study/",
'post',
Expand Down
11 changes: 8 additions & 3 deletions tests/test_study/test_study_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ def test_get_study(self):
self.assertEqual(len(study.setups), 30)

def test_get_tasks(self):
study_id = 14
study_id = 1

study = openml.study.get_study(study_id, 'tasks')
self.assertGreater(len(study.tasks), 0)
self.assertGreater(len(study.data), 0)
self.assertGreaterEqual(len(study.tasks), len(study.data))
# note that other entities are None, even though this study has
# datasets
self.assertIsNone(study.data)
self.assertIsNone(study.flows)
self.assertIsNone(study.setups)
self.assertIsNone(study.runs)
Expand Down Expand Up @@ -159,3 +159,8 @@ def test_study_attach_illegal(self):
openml.study.attach_to_study(study_id, list(run_list_more.keys()))
study_downloaded = openml.study.get_study(study_id)
self.assertListEqual(study_original.runs, study_downloaded.runs)

def test_study_list(self):
study_list = openml.study.list_studies(status='in_preparation')
# might fail if server is recently resetted
self.assertGreater(len(study_list), 2)
2 changes: 1 addition & 1 deletion tests/test_tasks/test_task_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_list_tasks_empty(self):

def test_list_tasks_by_tag(self):
num_basic_tasks = 100 # number is flexible, check server if fails
tasks = openml.tasks.list_tasks(tag='study_14')
tasks = openml.tasks.list_tasks(tag='OpenML100')
self.assertGreaterEqual(len(tasks), num_basic_tasks)
for tid in tasks:
self._check_task(tasks[tid])
Expand Down