Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Changelog

0.10.0
~~~~~~
* FIX #838: Fix list_evaluations_setups to work when evaluations are not a 100 multiple.
* ADD #737: Add list_evaluations_setups to return hyperparameters along with list of evaluations.
* FIX #261: Test server is cleared of all files uploaded during unit testing.
* FIX #447: All files created by unit tests no longer persist in local.
Expand Down
13 changes: 8 additions & 5 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,14 +324,17 @@ def list_evaluations_setups(
evals = list_evaluations(function=function, offset=offset, size=size, run=run, task=task,
setup=setup, flow=flow, uploader=uploader, tag=tag,
per_fold=per_fold, sort_order=sort_order, output_format='dataframe')

# List setups
# Split setups in evals into chunks of N setups as list_setups does not support large size
# list_setups by setup id does not support large sizes (exceeds URL length limit)
# Hence we split the list of unique setup ids returned by list_evaluations into chunks of size N
df = pd.DataFrame()
if len(evals) != 0:
N = 100
setup_chunks = np.split(evals['setup_id'].unique(),
((len(evals['setup_id'].unique()) - 1) // N) + 1)
N = 100 # size of section
length = len(evals['setup_id'].unique()) # length of the array we want to split
# array_split - allows indices_or_sections to not equally divide the array
# array_split -length % N sub-arrays of size length//N + 1 and the rest of size length//N.
setup_chunks = np.array_split(ary=evals['setup_id'].unique(),
indices_or_sections=((length - 1) // N) + 1)
setups = pd.DataFrame()
for setup in setup_chunks:
result = pd.DataFrame(openml.setups.list_setups(setup=setup, output_format='dataframe'))
Expand Down
14 changes: 8 additions & 6 deletions tests/test_evaluations/test_evaluation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
class TestEvaluationFunctions(TestBase):
_multiprocess_can_split_ = True

def _check_list_evaluation_setups(self, size, **kwargs):
def _check_list_evaluation_setups(self, **kwargs):
evals_setups = openml.evaluations.list_evaluations_setups("predictive_accuracy",
**kwargs, size=size,
**kwargs,
sort_order='desc',
output_format='dataframe')
evals = openml.evaluations.list_evaluations("predictive_accuracy",
**kwargs, size=size,
**kwargs,
sort_order='desc',
output_format='dataframe')

# Check if list is non-empty
self.assertGreater(len(evals_setups), 0)
# Check if length is accurate
self.assertEqual(len(evals_setups), len(evals))
# Check if output from sort is sorted in the right order
self.assertSequenceEqual(sorted(evals_setups['value'].tolist(), reverse=True),
evals_setups['value'].tolist())
Expand Down Expand Up @@ -176,7 +178,7 @@ def test_list_evaluations_setups_filter_flow(self):
openml.config.server = self.production_server
flow_id = [405]
size = 100
evals = self._check_list_evaluation_setups(size, flow=flow_id)
evals = self._check_list_evaluation_setups(flow=flow_id, size=size)
# check if parameters in separate columns works
evals_cols = openml.evaluations.list_evaluations_setups("predictive_accuracy",
flow=flow_id, size=size,
Expand All @@ -191,5 +193,5 @@ def test_list_evaluations_setups_filter_flow(self):
def test_list_evaluations_setups_filter_task(self):
openml.config.server = self.production_server
task_id = [6]
size = 100
self._check_list_evaluation_setups(size, task=task_id)
size = 121
self._check_list_evaluation_setups(task=task_id, size=size)