From 99679d988c9cc24a459dd6a8e7ee46e12a7541a1 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Thu, 16 Sep 2021 18:13:26 +0200
Subject: [PATCH 1/4] Update function signatures for create_study|suite

Publish explicitly empty suites and downloading any empty suites not
currently supported.
---
 doc/progress.rst                         |  2 +-
 openml/study/functions.py                | 24 ++++++++++++---------
 tests/test_study/test_study_functions.py | 27 ++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 937c60eb2..110eb664c 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.13.0
 ~~~~~~
-
+ * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional actually optional.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
 
 
diff --git a/openml/study/functions.py b/openml/study/functions.py
index ee877ddf2..2e852e1b8 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -177,9 +177,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
 def create_study(
     name: str,
     description: str,
-    run_ids: List[int],
-    alias: Optional[str],
-    benchmark_suite: Optional[int],
+    run_ids: Optional[List[int]] = None,
+    alias: Optional[str] = None,
+    benchmark_suite: Optional[int] = None,
 ) -> OpenMLStudy:
     """
     Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +188,19 @@ def create_study(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     benchmark_suite : int (optional)
         the benchmark suite (another study) upon which this study is ran.
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
-    run_ids : list
-        a list of run ids associated with this study
+    run_ids : list, optional
+        a list of run ids associated with this study,
+        these can also be added later with ``attach_to_study``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
+    benchmark_suite: int (optional)
+        the ID of the suite for which this study contains run results
 
     Returns
     -------
@@ -223,7 +226,7 @@ def create_study(
 
 
 def create_benchmark_suite(
-    name: str, description: str, task_ids: List[int], alias: Optional[str],
+    name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
 ) -> OpenMLBenchmarkSuite:
     """
     Creates an OpenML benchmark suite (collection of entity types, where
@@ -231,14 +234,15 @@ def create_benchmark_suite(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
     task_ids : list
         a list of task ids associated with this study
+        more can be added later with ``attach_to_suite``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
 
     Returns
     -------
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index e028ba2bd..7be1c71c0 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -1,4 +1,5 @@
 # License: BSD 3-Clause
+from typing import Optional, List
 
 import openml
 import openml.study
@@ -114,6 +115,32 @@ def test_publish_benchmark_suite(self):
         self.assertEqual(study_downloaded.status, "deactivated")
         # can't delete study, now it's not longer in preparation
 
+    def _test_publish_empty_study_is_allowed(self, explicit: bool):
+        runs: Optional[List[int]] = [] if explicit else None
+        kind = "explicit" if explicit else "implicit"
+
+        study = openml.study.create_study(
+            name=f"empty-study-{kind}",
+            description=f"a study with no runs attached {kind}ly",
+            run_ids=runs,
+        )
+
+        study.publish()
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+
+        self.assertGreater(study.id, 0)
+        study_downloaded = openml.study.get_study(study.id)
+        self.assertEqual(study_downloaded.main_entity_type, "run")
+        self.assertListEqual(study_downloaded.runs, [])
+        openml.study.delete_study(study.id)
+
+    def test_publish_empty_study_explicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=True)
+
+    def test_publish_empty_study_implicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=False)
+
     @pytest.mark.flaky()
     def test_publish_study(self):
         # get some random runs to attach

From b6784bee3205f8609d12e96e0496a613f867d3bd Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 13 Oct 2021 17:04:39 +0200
Subject: [PATCH 2/4] Refactor _get_study

---
 openml/study/functions.py                | 41 +++++++-----------------
 tests/test_study/test_study_functions.py |  4 +--
 2 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/openml/study/functions.py b/openml/study/functions.py
index 2e852e1b8..144c089b3 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -3,7 +3,6 @@
 from typing import cast, Dict, List, Optional, Union
 import warnings
 
-import dateutil.parser
 import xmltodict
 import pandas as pd
 
@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     description = result_dict["oml:description"]
     status = result_dict["oml:status"]
     creation_date = result_dict["oml:creation_date"]
-    creation_date_as_date = dateutil.parser.parse(creation_date)
     creator = result_dict["oml:creator"]
 
     # tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
 
-    if "oml:data" in result_dict:
-        datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
-    else:
-        raise ValueError("No datasets attached to study {}!".format(id_))
-    if "oml:tasks" in result_dict:
-        tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
-    else:
-        raise ValueError("No tasks attached to study {}!".format(id_))
+    def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
+        if result_dict.get(key) is not None:
+            return [int(oml_id) for oml_id in result_dict[key][subkey]]
+        return None
 
-    if main_entity_type in ["runs", "run"]:
+    datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
+    tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")
 
-        if "oml:flows" in result_dict:
-            flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
-        else:
-            raise ValueError("No flows attached to study {}!".format(id_))
-        if "oml:setups" in result_dict:
-            setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
-        else:
-            raise ValueError("No setups attached to study {}!".format(id_))
-        if "oml:runs" in result_dict:
-            runs = [
-                int(x) for x in result_dict["oml:runs"]["oml:run_id"]
-            ]  # type: Optional[List[int]]
-        else:
-            if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
-                # Legacy studies did not require runs
-                runs = None
-            else:
-                raise ValueError("No runs attached to study {}!".format(id_))
+    if main_entity_type in ["runs", "run"]:
+        flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
+        setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
+        runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")
 
         study = OpenMLStudy(
             study_id=study_id,
@@ -220,7 +201,7 @@ def create_study(
         data=None,
         tasks=None,
         flows=None,
-        runs=run_ids,
+        runs=run_ids if run_ids != [] else None,
         setups=None,
     )
 
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 7be1c71c0..adb46db81 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -132,7 +132,7 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
         self.assertGreater(study.id, 0)
         study_downloaded = openml.study.get_study(study.id)
         self.assertEqual(study_downloaded.main_entity_type, "run")
-        self.assertListEqual(study_downloaded.runs, [])
+        self.assertIsNone(study_downloaded.runs)
         openml.study.delete_study(study.id)
 
     def test_publish_empty_study_explicit(self):
@@ -241,7 +241,7 @@ def test_study_attach_illegal(self):
 
     def test_study_list(self):
         study_list = openml.study.list_studies(status="in_preparation")
-        # might fail if server is recently resetted
+        # might fail if server is recently reset
         self.assertGreaterEqual(len(study_list), 2)
 
     def test_study_list_output_format(self):

From 47043cd4d0f7ac830d8f23ec793b194efea55e73 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 27 Oct 2021 14:55:10 +0200
Subject: [PATCH 3/4] Update doc/progress.rst

Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
---
 doc/progress.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 110eb664c..401550a4d 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.13.0
 ~~~~~~
- * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional actually optional.
+ * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
 
 

From 870e2b76ec2bf1c6e7b52cd803b37359abbc1cf0 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 27 Oct 2021 15:13:14 +0200
Subject: [PATCH 4/4] Remove explicit delete study call

Since unit test cleanup mechanism should take care of that.
---
 tests/test_study/test_study_functions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index adb46db81..904df4d3a 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -133,7 +133,6 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
         study_downloaded = openml.study.get_study(study.id)
         self.assertEqual(study_downloaded.main_entity_type, "run")
         self.assertIsNone(study_downloaded.runs)
-        openml.study.delete_study(study.id)
 
     def test_publish_empty_study_explicit(self):
         self._test_publish_empty_study_is_allowed(explicit=True)