Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Changelog

0.11.1
~~~~~~
* MAINT #671: Improved the performance of ``check_datasets_active`` by only querying the given list of datasets in contrast to querying all datasets. Modified the corresponding unit test.

0.11.0
~~~~~~
Expand Down
14 changes: 12 additions & 2 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,23 @@ def _load_features_from_file(features_file: str) -> Dict:
return xml_dict["oml:data_features"]


def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
def check_datasets_active(
dataset_ids: List[int],
raise_error_if_not_exist: bool = True,
) -> Dict[int, bool]:
"""
Check if the dataset ids provided are active.

Raises an error if a dataset_id in the given list
of dataset_ids does not exist on the server.

Parameters
----------
dataset_ids : List[int]
A list of integers representing dataset ids.
raise_error_if_not_exist : bool (default=True)
Flag that if activated can raise an error, if one or more of the
given dataset ids do not exist on the server.

Returns
-------
Expand All @@ -353,7 +362,8 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
for did in dataset_ids:
dataset = dataset_list.get(did, None)
if dataset is None:
raise ValueError("Could not find dataset {} in OpenML dataset list.".format(did))
if raise_error_if_not_exist:
raise ValueError(f'Could not find dataset {did} in OpenML dataset list.')
else:
active[did] = dataset["status"] == "active"

Expand Down
6 changes: 5 additions & 1 deletion tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,13 @@ def test_list_datasets_empty(self):
def test_check_datasets_active(self):
# Have to test on live because there is no deactivated dataset on the test server.
openml.config.server = self.production_server
active = openml.datasets.check_datasets_active([2, 17])
active = openml.datasets.check_datasets_active(
[2, 17, 79],
raise_error_if_not_exist=False,
)
self.assertTrue(active[2])
self.assertFalse(active[17])
self.assertIsNone(active.get(79))
self.assertRaisesRegex(
ValueError,
"Could not find dataset 79 in OpenML dataset list.",
Expand Down