From 99679d988c9cc24a459dd6a8e7ee46e12a7541a1 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Thu, 16 Sep 2021 18:13:26 +0200
Subject: [PATCH 1/4] Update function signatures for create_study|suite
Publish explicitly empty suites and downloading any empty suites not
currently supported.
---
doc/progress.rst | 2 +-
openml/study/functions.py | 24 ++++++++++++---------
tests/test_study/test_study_functions.py | 27 ++++++++++++++++++++++++
3 files changed, 42 insertions(+), 11 deletions(-)
diff --git a/doc/progress.rst b/doc/progress.rst
index 937c60eb2..110eb664c 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
0.13.0
~~~~~~
-
+ * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional actually optional.
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
diff --git a/openml/study/functions.py b/openml/study/functions.py
index ee877ddf2..2e852e1b8 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -177,9 +177,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
def create_study(
name: str,
description: str,
- run_ids: List[int],
- alias: Optional[str],
- benchmark_suite: Optional[int],
+ run_ids: Optional[List[int]] = None,
+ alias: Optional[str] = None,
+ benchmark_suite: Optional[int] = None,
) -> OpenMLStudy:
"""
Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +188,19 @@ def create_study(
Parameters
----------
- alias : str (optional)
- a string ID, unique on server (url-friendly)
benchmark_suite : int (optional)
the benchmark suite (another study) upon which this study is ran.
name : str
the name of the study (meta-info)
description : str
brief description (meta-info)
- run_ids : list
- a list of run ids associated with this study
+ run_ids : list, optional
+ a list of run ids associated with this study,
+ these can also be added later with ``attach_to_study``.
+ alias : str (optional)
+ a string ID, unique on server (url-friendly)
+ benchmark_suite: int (optional)
+ the ID of the suite for which this study contains run results
Returns
-------
@@ -223,7 +226,7 @@ def create_study(
def create_benchmark_suite(
- name: str, description: str, task_ids: List[int], alias: Optional[str],
+ name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
) -> OpenMLBenchmarkSuite:
"""
Creates an OpenML benchmark suite (collection of entity types, where
@@ -231,14 +234,15 @@ def create_benchmark_suite(
Parameters
----------
- alias : str (optional)
- a string ID, unique on server (url-friendly)
name : str
the name of the study (meta-info)
description : str
brief description (meta-info)
task_ids : list
a list of task ids associated with this study
+ more can be added later with ``attach_to_suite``.
+ alias : str (optional)
+ a string ID, unique on server (url-friendly)
Returns
-------
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index e028ba2bd..7be1c71c0 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -1,4 +1,5 @@
# License: BSD 3-Clause
+from typing import Optional, List
import openml
import openml.study
@@ -114,6 +115,32 @@ def test_publish_benchmark_suite(self):
self.assertEqual(study_downloaded.status, "deactivated")
# can't delete study, now it's not longer in preparation
+ def _test_publish_empty_study_is_allowed(self, explicit: bool):
+ runs: Optional[List[int]] = [] if explicit else None
+ kind = "explicit" if explicit else "implicit"
+
+ study = openml.study.create_study(
+ name=f"empty-study-{kind}",
+ description=f"a study with no runs attached {kind}ly",
+ run_ids=runs,
+ )
+
+ study.publish()
+ TestBase._mark_entity_for_removal("study", study.id)
+ TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+
+ self.assertGreater(study.id, 0)
+ study_downloaded = openml.study.get_study(study.id)
+ self.assertEqual(study_downloaded.main_entity_type, "run")
+ self.assertListEqual(study_downloaded.runs, [])
+ openml.study.delete_study(study.id)
+
+ def test_publish_empty_study_explicit(self):
+ self._test_publish_empty_study_is_allowed(explicit=True)
+
+ def test_publish_empty_study_implicit(self):
+ self._test_publish_empty_study_is_allowed(explicit=False)
+
@pytest.mark.flaky()
def test_publish_study(self):
# get some random runs to attach
From b6784bee3205f8609d12e96e0496a613f867d3bd Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 13 Oct 2021 17:04:39 +0200
Subject: [PATCH 2/4] Refactor _get_study
---
openml/study/functions.py | 41 +++++++-----------------
tests/test_study/test_study_functions.py | 4 +--
2 files changed, 13 insertions(+), 32 deletions(-)
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 2e852e1b8..144c089b3 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -3,7 +3,6 @@
from typing import cast, Dict, List, Optional, Union
import warnings
-import dateutil.parser
import xmltodict
import pandas as pd
@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
description = result_dict["oml:description"]
status = result_dict["oml:status"]
creation_date = result_dict["oml:creation_date"]
- creation_date_as_date = dateutil.parser.parse(creation_date)
creator = result_dict["oml:creator"]
# tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
current_tag["window_start"] = tag["oml:window_start"]
tags.append(current_tag)
- if "oml:data" in result_dict:
- datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
- else:
- raise ValueError("No datasets attached to study {}!".format(id_))
- if "oml:tasks" in result_dict:
- tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
- else:
- raise ValueError("No tasks attached to study {}!".format(id_))
+ def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
+ if result_dict.get(key) is not None:
+ return [int(oml_id) for oml_id in result_dict[key][subkey]]
+ return None
- if main_entity_type in ["runs", "run"]:
+ datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
+ tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")
- if "oml:flows" in result_dict:
- flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
- else:
- raise ValueError("No flows attached to study {}!".format(id_))
- if "oml:setups" in result_dict:
- setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
- else:
- raise ValueError("No setups attached to study {}!".format(id_))
- if "oml:runs" in result_dict:
- runs = [
- int(x) for x in result_dict["oml:runs"]["oml:run_id"]
- ] # type: Optional[List[int]]
- else:
- if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
- # Legacy studies did not require runs
- runs = None
- else:
- raise ValueError("No runs attached to study {}!".format(id_))
+ if main_entity_type in ["runs", "run"]:
+ flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
+ setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
+ runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")
study = OpenMLStudy(
study_id=study_id,
@@ -220,7 +201,7 @@ def create_study(
data=None,
tasks=None,
flows=None,
- runs=run_ids,
+ runs=run_ids if run_ids != [] else None,
setups=None,
)
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 7be1c71c0..adb46db81 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -132,7 +132,7 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
self.assertGreater(study.id, 0)
study_downloaded = openml.study.get_study(study.id)
self.assertEqual(study_downloaded.main_entity_type, "run")
- self.assertListEqual(study_downloaded.runs, [])
+ self.assertIsNone(study_downloaded.runs)
openml.study.delete_study(study.id)
def test_publish_empty_study_explicit(self):
@@ -241,7 +241,7 @@ def test_study_attach_illegal(self):
def test_study_list(self):
study_list = openml.study.list_studies(status="in_preparation")
- # might fail if server is recently resetted
+ # might fail if server is recently reset
self.assertGreaterEqual(len(study_list), 2)
def test_study_list_output_format(self):
From 47043cd4d0f7ac830d8f23ec793b194efea55e73 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers
Date: Wed, 27 Oct 2021 14:55:10 +0200
Subject: [PATCH 3/4] Update doc/progress.rst
Co-authored-by: Matthias Feurer
---
doc/progress.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/progress.rst b/doc/progress.rst
index 110eb664c..401550a4d 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
0.13.0
~~~~~~
- * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional actually optional.
+ * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
From 870e2b76ec2bf1c6e7b52cd803b37359abbc1cf0 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Wed, 27 Oct 2021 15:13:14 +0200
Subject: [PATCH 4/4] Remove explicit delete study call
Since unit test cleanup mechanism should take care of that.
---
tests/test_study/test_study_functions.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index adb46db81..904df4d3a 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -133,7 +133,6 @@ def _test_publish_empty_study_is_allowed(self, explicit: bool):
study_downloaded = openml.study.get_study(study.id)
self.assertEqual(study_downloaded.main_entity_type, "run")
self.assertIsNone(study_downloaded.runs)
- openml.study.delete_study(study.id)
def test_publish_empty_study_explicit(self):
self._test_publish_empty_study_is_allowed(explicit=True)