From 3a23053adb3128e1edd1f3ed852a849c21ca0c6d Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Mon, 25 Feb 2019 15:26:24 +0100
Subject: [PATCH 1/5] adds list studies

---
 openml/study/__init__.py                 |  6 +-
 openml/study/functions.py                | 97 ++++++++++++++++++++++++
 tests/test_study/test_study_functions.py |  5 ++
 3 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/openml/study/__init__.py b/openml/study/__init__.py
index f99b0d638..026591f46 100644
--- a/openml/study/__init__.py
+++ b/openml/study/__init__.py
@@ -1,9 +1,11 @@
 from .study import OpenMLStudy
 from .functions import get_study, create_study, create_benchmark_suite, \
-    status_update, attach_to_study, detach_from_study, delete_study
+    status_update, attach_to_study, detach_from_study, delete_study, \
+    list_studies
 
 
 __all__ = [
     'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
-    'delete_study', 'detach_from_study', 'get_study', 'status_update',
+    'delete_study', 'detach_from_study', 'get_study', 'list_studies', 
+    'status_update'
 ]
diff --git a/openml/study/functions.py b/openml/study/functions.py
index a2600e4a0..3ab49460c 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -278,3 +278,100 @@ def detach_from_study(study_id, entity_ids):
                                                      post_variables)
     result = xmltodict.parse(result_xml)['oml:study_detach']
     return int(result['oml:linked_entities'])
+
+
+def list_studies(offset=None, size=None, main_entity_type=None,status=None,
+                 uploader=None):
+    """
+    Return a list of all studies which are on OpenML.
+
+    Parameters
+    ----------
+    offset : int, optional
+        The number of studies to skip, starting from the first.
+    size : int, optional
+        The maximum number of studies to show.
+    main_entity_type : str, optional
+        Can be `task` or `run`. In case of `task`, only benchmark suites are
+        returned. In case of `run`, only studies are returned. 
+    status : str, optional
+        Should be {active, in_preparation, deactivated, all}. By default active
+        studies are returned.
+    uploader : list (int), optional
+        Result filter. Will only return studies created by these users.
+
+    Returns
+    -------
+    datasets : dict of dicts
+        A mapping from dataset ID to dict.
+
+        Every dataset is represented by a dictionary containing
+        the following information:
+        - id
+        - name
+        - main_entity_type
+        - status
+        - creator
+        - creation_date
+
+        If qualities are calculated for the dataset, some of
+        these are also returned.
+    """
+    return openml.utils._list_all(_list_studies,
+                                  offset=offset,
+                                  size=size,
+                                  main_entity_type=main_entity_type,
+                                  status=status,
+                                  uploader=uploader)
+
+
+def _list_studies(**kwargs):
+    """
+    Perform api call to return a list of studies.
+
+    Parameters
+    ----------
+    kwargs : dict, optional
+        Legal filter operators (keys in the dict):
+        status, limit, offset, main_entity_type, uploader
+
+    Returns
+    -------
+    studies : dict of dicts
+    """
+    api_call = "study/list"
+    if kwargs is not None:
+        for operator, value in kwargs.items():
+            api_call += "/%s/%s" % (operator, value)
+    return __list_studies(api_call)
+
+
+def __list_studies(api_call):
+    xml_string = openml._api_calls._perform_api_call(api_call, 'get')
+    study_dict = xmltodict.parse(xml_string, force_list=('oml:study',))
+
+    # Minimalistic check if the XML is useful
+    assert type(study_dict['oml:study_list']['oml:study']) == list, \
+        type(study_dict['oml:study_list'])
+    assert study_dict['oml:study_list']['@xmlns:oml'] == \
+        'http://openml.org/openml', study_dict['oml:study_list']['@xmlns:oml']
+
+    studies = dict()
+    for study_ in study_dict['oml:study_list']['oml:study']:
+        expected_fields = {
+            'oml:id': 'id',
+            'oml:alias': 'alias',
+            'oml:main_entity_type': 'main_entity_type',
+            'oml:name': 'name',
+            'oml:status': 'status',
+            'oml:creation_date': 'creation_date',
+            'oml:creator': 'creator'
+        }
+        study_id = int(study_['oml:id'])
+        current_study = dict()
+        for oml_field_name, real_field_name in expected_fields.items():
+            if oml_field_name in study_:
+                current_study[real_field_name] = study_[oml_field_name]
+        current_study['id'] = int(current_study['id'])
+        studies[study_id] = current_study
+    return studies
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 23f6ff32d..4cb19a58b 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -159,3 +159,8 @@ def test_study_attach_illegal(self):
             openml.study.attach_to_study(study_id, list(run_list_more.keys()))
         study_downloaded = openml.study.get_study(study_id)
         self.assertListEqual(study_original.runs, study_downloaded.runs)
+
+    def test_study_list(self):
+        study_list = openml.study.list_studies(status='in_preparation')
+        # might fail if server is recently resetted
+        self.assertGreater(len(study_list), 2)

From ab5299bf7614a1aca6e1e8e1fa57e144ed1ece85 Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Mon, 25 Feb 2019 15:41:23 +0100
Subject: [PATCH 2/5] PEP8

---
 openml/study/__init__.py  | 2 +-
 openml/study/functions.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/openml/study/__init__.py b/openml/study/__init__.py
index 026591f46..751beffa9 100644
--- a/openml/study/__init__.py
+++ b/openml/study/__init__.py
@@ -6,6 +6,6 @@
 
 __all__ = [
     'OpenMLStudy', 'attach_to_study', 'create_benchmark_suite', 'create_study',
-    'delete_study', 'detach_from_study', 'get_study', 'list_studies', 
+    'delete_study', 'detach_from_study', 'get_study', 'list_studies',
     'status_update'
 ]
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 3ab49460c..65dacf407 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -280,7 +280,7 @@ def detach_from_study(study_id, entity_ids):
     return int(result['oml:linked_entities'])
 
 
-def list_studies(offset=None, size=None, main_entity_type=None,status=None,
+def list_studies(offset=None, size=None, main_entity_type=None, status=None,
                  uploader=None):
     """
     Return a list of all studies which are on OpenML.
@@ -293,7 +293,7 @@ def list_studies(offset=None, size=None, main_entity_type=None,status=None,
         The maximum number of studies to show.
     main_entity_type : str, optional
         Can be `task` or `run`. In case of `task`, only benchmark suites are
-        returned. In case of `run`, only studies are returned. 
+        returned. In case of `run`, only studies are returned.
     status : str, optional
         Should be {active, in_preparation, deactivated, all}. By default active
         studies are returned.

From 7db4c705fea7401d468113cd80daad9b1b7196aa Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Mon, 25 Feb 2019 16:00:26 +0100
Subject: [PATCH 3/5] benchmark suite

---
 openml/study/functions.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index 65dacf407..21fac6726 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -281,7 +281,7 @@ def detach_from_study(study_id, entity_ids):
 
 
 def list_studies(offset=None, size=None, main_entity_type=None, status=None,
-                 uploader=None):
+                 uploader=None, benchmark_suite=None):
     """
     Return a list of all studies which are on OpenML.
 
@@ -308,8 +308,10 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None,
         Every dataset is represented by a dictionary containing
         the following information:
         - id
+        - alias (optional)
         - name
         - main_entity_type
+        - benchmark_suite (optional)
         - status
         - creator
         - creation_date
@@ -322,7 +324,8 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None,
                                   size=size,
                                   main_entity_type=main_entity_type,
                                   status=status,
-                                  uploader=uploader)
+                                  uploader=uploader,
+                                  benchmark_suite=benchmark_suite)
 
 
 def _list_studies(**kwargs):
@@ -362,6 +365,7 @@ def __list_studies(api_call):
             'oml:id': 'id',
             'oml:alias': 'alias',
             'oml:main_entity_type': 'main_entity_type',
+            'oml:benchmark_suite': 'benchmark_suite',
             'oml:name': 'name',
             'oml:status': 'status',
             'oml:creation_date': 'creation_date',

From 42b9668120215d8cd9a1a777c470eb03e331eed8 Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Tue, 26 Feb 2019 14:23:13 +0100
Subject: [PATCH 4/5] fix unit tests

---
 openml/study/study.py                    | 1 -
 tests/test_study/test_study_functions.py | 6 +++---
 tests/test_tasks/test_task_functions.py  | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/openml/study/study.py b/openml/study/study.py
index a07b4b5bf..6e9311675 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -83,7 +83,6 @@ def publish(self):
         file_elements = {
             'description': self._to_xml()
         }
-
         return_value = openml._api_calls._perform_api_call(
             "study/",
             'post',
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 4cb19a58b..9a91beb61 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -18,13 +18,13 @@ def test_get_study(self):
         self.assertEqual(len(study.setups), 30)
 
     def test_get_tasks(self):
-        study_id = 14
+        study_id = 1
 
         study = openml.study.get_study(study_id, 'tasks')
-        self.assertGreater(len(study.tasks), 0)
+        self.assertGreater(len(study.data), 0)
+        self.assertGreaterEqual(len(study.tasks), len(study.data))
         # note that other entities are None, even though this study has
         # datasets
-        self.assertIsNone(study.data)
         self.assertIsNone(study.flows)
         self.assertIsNone(study.setups)
         self.assertIsNone(study.runs)
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 867c14d1b..02b505fc6 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -73,7 +73,7 @@ def test_list_tasks_empty(self):
 
     def test_list_tasks_by_tag(self):
         num_basic_tasks = 100  # number is flexible, check server if fails
-        tasks = openml.tasks.list_tasks(tag='study_14')
+        tasks = openml.tasks.list_tasks(tag='OpenML100')
         self.assertGreaterEqual(len(tasks), num_basic_tasks)
         for tid in tasks:
             self._check_task(tasks[tid])

From 4f60c2587ac779e5592cdfbce24c19ba6d87ea55 Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Tue, 26 Feb 2019 14:26:22 +0100
Subject: [PATCH 5/5] comments by Matthias F.

---
 openml/study/functions.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index 21fac6726..6c0c67b44 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -292,8 +292,8 @@ def list_studies(offset=None, size=None, main_entity_type=None, status=None,
     size : int, optional
         The maximum number of studies to show.
     main_entity_type : str, optional
-        Can be `task` or `run`. In case of `task`, only benchmark suites are
-        returned. In case of `run`, only studies are returned.
+        Can be ``'task'`` or ``'run'``. In case of `task`, only benchmark
+        suites are returned. In case of `run`, only studies are returned.
     status : str, optional
         Should be {active, in_preparation, deactivated, all}. By default active
         studies are returned.
@@ -361,21 +361,22 @@ def __list_studies(api_call):
 
     studies = dict()
     for study_ in study_dict['oml:study_list']['oml:study']:
+        # maps from xml name to a tuple of (dict name, casting fn)
         expected_fields = {
-            'oml:id': 'id',
-            'oml:alias': 'alias',
-            'oml:main_entity_type': 'main_entity_type',
-            'oml:benchmark_suite': 'benchmark_suite',
-            'oml:name': 'name',
-            'oml:status': 'status',
-            'oml:creation_date': 'creation_date',
-            'oml:creator': 'creator'
+            'oml:id': ('id', int),
+            'oml:alias': ('alias', str),
+            'oml:main_entity_type': ('main_entity_type', str),
+            'oml:benchmark_suite': ('benchmark_suite', int),
+            'oml:name': ('name', str),
+            'oml:status': ('status', str),
+            'oml:creation_date': ('creation_date', str),
+            'oml:creator': ('creator', int),
         }
         study_id = int(study_['oml:id'])
         current_study = dict()
-        for oml_field_name, real_field_name in expected_fields.items():
+        for oml_field_name, (real_field_name, cast_fn) in expected_fields.items():
             if oml_field_name in study_:
-                current_study[real_field_name] = study_[oml_field_name]
+                current_study[real_field_name] = cast_fn(study_[oml_field_name])
         current_study['id'] = int(current_study['id'])
         studies[study_id] = current_study
     return studies