Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Changelog

0.13.0
~~~~~~

* FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
* MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.


Expand Down
65 changes: 25 additions & 40 deletions openml/study/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import cast, Dict, List, Optional, Union
import warnings

import dateutil.parser
import xmltodict
import pandas as pd

Expand Down Expand Up @@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
description = result_dict["oml:description"]
status = result_dict["oml:status"]
creation_date = result_dict["oml:creation_date"]
creation_date_as_date = dateutil.parser.parse(creation_date)
creator = result_dict["oml:creator"]

# tags is legacy. remove once no longer needed.
Expand All @@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
current_tag["window_start"] = tag["oml:window_start"]
tags.append(current_tag)

if "oml:data" in result_dict:
datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
else:
raise ValueError("No datasets attached to study {}!".format(id_))
if "oml:tasks" in result_dict:
tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
else:
raise ValueError("No tasks attached to study {}!".format(id_))
def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
if result_dict.get(key) is not None:
return [int(oml_id) for oml_id in result_dict[key][subkey]]
return None

if main_entity_type in ["runs", "run"]:
datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")

if "oml:flows" in result_dict:
flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
else:
raise ValueError("No flows attached to study {}!".format(id_))
if "oml:setups" in result_dict:
setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
else:
raise ValueError("No setups attached to study {}!".format(id_))
if "oml:runs" in result_dict:
runs = [
int(x) for x in result_dict["oml:runs"]["oml:run_id"]
] # type: Optional[List[int]]
else:
if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
# Legacy studies did not require runs
runs = None
else:
raise ValueError("No runs attached to study {}!".format(id_))
if main_entity_type in ["runs", "run"]:
flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")

study = OpenMLStudy(
study_id=study_id,
Expand Down Expand Up @@ -177,9 +158,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
def create_study(
name: str,
description: str,
run_ids: List[int],
alias: Optional[str],
benchmark_suite: Optional[int],
run_ids: Optional[List[int]] = None,
alias: Optional[str] = None,
benchmark_suite: Optional[int] = None,
) -> OpenMLStudy:
"""
Creates an OpenML study (collection of data, tasks, flows, setups and run),
Expand All @@ -188,16 +169,19 @@ def create_study(

Parameters
----------
alias : str (optional)
a string ID, unique on server (url-friendly)
benchmark_suite : int (optional)
the benchmark suite (another study) upon which this study is ran.
name : str
the name of the study (meta-info)
description : str
brief description (meta-info)
run_ids : list
a list of run ids associated with this study
run_ids : list, optional
a list of run ids associated with this study,
these can also be added later with ``attach_to_study``.
alias : str (optional)
a string ID, unique on server (url-friendly)
benchmark_suite: int (optional)
the ID of the suite for which this study contains run results

Returns
-------
Expand All @@ -217,28 +201,29 @@ def create_study(
data=None,
tasks=None,
flows=None,
runs=run_ids,
runs=run_ids if run_ids != [] else None,
setups=None,
)


def create_benchmark_suite(
name: str, description: str, task_ids: List[int], alias: Optional[str],
name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
) -> OpenMLBenchmarkSuite:
"""
Creates an OpenML benchmark suite (collection of entity types, where
the tasks are the linked entity)

Parameters
----------
alias : str (optional)
a string ID, unique on server (url-friendly)
name : str
the name of the study (meta-info)
description : str
brief description (meta-info)
task_ids : list
a list of task ids associated with this study
more can be added later with ``attach_to_suite``.
alias : str (optional)
a string ID, unique on server (url-friendly)

Returns
-------
Expand Down
28 changes: 27 additions & 1 deletion tests/test_study/test_study_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# License: BSD 3-Clause
from typing import Optional, List

import openml
import openml.study
Expand Down Expand Up @@ -114,6 +115,31 @@ def test_publish_benchmark_suite(self):
self.assertEqual(study_downloaded.status, "deactivated")
# can't delete study, now it's not longer in preparation

def _test_publish_empty_study_is_allowed(self, explicit: bool):
runs: Optional[List[int]] = [] if explicit else None
kind = "explicit" if explicit else "implicit"

study = openml.study.create_study(
name=f"empty-study-{kind}",
description=f"a study with no runs attached {kind}ly",
run_ids=runs,
)

study.publish()
TestBase._mark_entity_for_removal("study", study.id)
TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))

self.assertGreater(study.id, 0)
study_downloaded = openml.study.get_study(study.id)
self.assertEqual(study_downloaded.main_entity_type, "run")
self.assertIsNone(study_downloaded.runs)

def test_publish_empty_study_explicit(self):
self._test_publish_empty_study_is_allowed(explicit=True)

def test_publish_empty_study_implicit(self):
self._test_publish_empty_study_is_allowed(explicit=False)

@pytest.mark.flaky()
def test_publish_study(self):
# get some random runs to attach
Expand Down Expand Up @@ -214,7 +240,7 @@ def test_study_attach_illegal(self):

def test_study_list(self):
study_list = openml.study.list_studies(status="in_preparation")
# might fail if server is recently resetted
# might fail if server is recently reset
self.assertGreaterEqual(len(study_list), 2)

def test_study_list_output_format(self):
Expand Down