diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh index 0a1f94df6..504d15bbd 100644 --- a/ci_scripts/test.sh +++ b/ci_scripts/test.sh @@ -19,7 +19,7 @@ run_tests() { cd $TEST_DIR if [[ "$COVERAGE" == "true" ]]; then - PYTEST_ARGS='--cov=openml' + PYTEST_ARGS='--cov=openml --long' else PYTEST_ARGS='' fi diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 84943b244..28bde17f6 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -347,7 +347,7 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]: dict A dictionary with items {did: bool} """ - dataset_list = list_datasets(status="all") + dataset_list = list_datasets(status="all", data_id=dataset_ids) active = {} for did in dataset_ids: diff --git a/tests/conftest.py b/tests/conftest.py index 461a513fd..6a66d4ed9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,6 +25,7 @@ import os import logging from typing import List +import pytest import openml from openml.testing import TestBase @@ -182,3 +183,17 @@ def pytest_sessionfinish() -> None: logger.info("Local files deleted") logger.info("{} is killed".format(worker)) + + +def pytest_addoption(parser): + parser.addoption( + "--long", + action="store_true", + default=False, + help="Run the long version of tests which support both short and long scenarios.", + ) + + +@pytest.fixture(scope="class") +def long_version(request): + request.cls.long_version = request.config.getoption("--long") diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py index e4de9b03c..70f36ce19 100644 --- a/tests/test_evaluations/test_evaluation_functions.py +++ b/tests/test_evaluations/test_evaluation_functions.py @@ -1,10 +1,12 @@ # License: BSD 3-Clause +import pytest import openml import openml.evaluations from openml.testing import TestBase +@pytest.mark.usefixtures("long_version") class TestEvaluationFunctions(TestBase): _multiprocess_can_split_ = True @@ -27,6 +29,10 @@ def _check_list_evaluation_setups(self, **kwargs): # Check if output and order of list_evaluations is preserved self.assertSequenceEqual(evals_setups["run_id"].tolist(), evals["run_id"].tolist()) + + if not self.long_version: + evals_setups = evals_setups.head(1) + # Check if the hyper-parameter column is as accurate and flow_id for index, row in evals_setups.iterrows(): params = openml.runs.get_run(row["run_id"]).parameter_settings diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py index 12af05ffe..69771ee01 100644 --- a/tests/test_flows/test_flow_functions.py +++ b/tests/test_flows/test_flow_functions.py @@ -2,18 +2,22 @@ from collections import OrderedDict import copy +import functools import unittest +from unittest.mock import patch from distutils.version import LooseVersion import sklearn from sklearn import ensemble import pandas as pd +import pytest import openml from openml.testing import TestBase import openml.extensions.sklearn +@pytest.mark.usefixtures("long_version") class TestFlowFunctions(TestBase): _multiprocess_can_split_ = True @@ -334,20 +338,27 @@ def test_get_flow_reinstantiate_model_wrong_version(self): assert "0.19.1" not in flow.dependencies def test_get_flow_id(self): - clf = sklearn.tree.DecisionTreeClassifier() - flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish() - - self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id) - flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False) - self.assertIn(flow.flow_id, flow_ids) - self.assertGreater(len(flow_ids), 2) - - # Check that the output of get_flow_id is identical if only the name is given, no matter - # whether exact_version is set to True or False. - flow_ids_exact_version_True = openml.flows.get_flow_id(name=flow.name, exact_version=True) - flow_ids_exact_version_False = openml.flows.get_flow_id( - name=flow.name, exact_version=False, - ) - self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False) - self.assertIn(flow.flow_id, flow_ids_exact_version_True) - self.assertGreater(len(flow_ids_exact_version_True), 2) + if self.long_version: + list_all = openml.utils._list_all + else: + list_all = functools.lru_cache()(openml.utils._list_all) + with patch("openml.utils._list_all", list_all): + clf = sklearn.tree.DecisionTreeClassifier() + flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish() + + self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id) + flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False) + self.assertIn(flow.flow_id, flow_ids) + self.assertGreater(len(flow_ids), 2) + + # Check that the output of get_flow_id is identical if only the name is given, no matter + # whether exact_version is set to True or False. + flow_ids_exact_version_True = openml.flows.get_flow_id( + name=flow.name, exact_version=True + ) + flow_ids_exact_version_False = openml.flows.get_flow_id( + name=flow.name, exact_version=False, + ) + self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False) + self.assertIn(flow.flow_id, flow_ids_exact_version_True) + self.assertGreater(len(flow_ids_exact_version_True), 2) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 89f01c72e..c4628c452 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -10,7 +10,6 @@ import unittest.mock import numpy as np -import pytest import openml import openml.exceptions @@ -335,7 +334,7 @@ def _check_sample_evaluations( for sample in range(num_sample_entrees): evaluation = sample_evaluations[measure][rep][fold][sample] self.assertIsInstance(evaluation, float) - if not os.environ.get("CI_WINDOWS"): + if not (os.environ.get("CI_WINDOWS") or os.name == "nt"): # Either Appveyor is much faster than Travis # and/or measurements are not as accurate. # Either way, windows seems to get an eval-time @@ -682,6 +681,8 @@ def test_run_and_upload_randomsearch(self): flow_expected_rsv="12172", ) self.assertEqual(len(run.trace.trace_iterations), 5) + trace = openml.runs.get_run_trace(run.run_id) + self.assertEqual(len(trace.trace_iterations), 5) def test_run_and_upload_maskedarrays(self): # This testcase is important for 2 reasons: @@ -828,31 +829,12 @@ def _test_local_evaluations(self, run): self.assertGreaterEqual(alt_scores[idx], 0) self.assertLessEqual(alt_scores[idx], 1) - @unittest.skipIf( - LooseVersion(sklearn.__version__) < "0.20", - reason="SimpleImputer doesn't handle mixed type DataFrame as input", - ) def test_local_run_swapped_parameter_order_model(self): + clf = DecisionTreeClassifier() + australian_task = 595 + task = openml.tasks.get_task(australian_task) - # construct sci-kit learn classifier - clf = Pipeline( - steps=[ - ( - "imputer", - make_pipeline( - SimpleImputer(strategy="most_frequent"), - OneHotEncoder(handle_unknown="ignore"), - ), - ), - # random forest doesn't take categoricals - ("estimator", RandomForestClassifier()), - ] - ) - - # download task - task = openml.tasks.get_task(7) - - # invoke OpenML run + # task and clf are purposely in the old order run = openml.runs.run_model_on_task( task, clf, avoid_duplicate_runs=False, upload_flow=False, ) @@ -950,55 +932,6 @@ def test_initialize_model_from_run(self): self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"') self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05") - @pytest.mark.flaky() - def test_get_run_trace(self): - # get_run_trace is already tested implicitly in test_run_and_publish - # this test is a bit additional. - num_iterations = 10 - num_folds = 1 - task_id = 119 - - task = openml.tasks.get_task(task_id) - - # IMPORTANT! Do not sentinel this flow. is faster if we don't wait - # on openml server - clf = RandomizedSearchCV( - RandomForestClassifier(random_state=42, n_estimators=5), - { - "max_depth": [3, None], - "max_features": [1, 2, 3, 4], - "bootstrap": [True, False], - "criterion": ["gini", "entropy"], - }, - num_iterations, - random_state=42, - cv=3, - ) - - # [SPEED] make unit test faster by exploiting run information - # from the past - try: - # in case the run did not exists yet - run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=True,) - - self.assertEqual( - len(run.trace.trace_iterations), num_iterations * num_folds, - ) - run = run.publish() - TestBase._mark_entity_for_removal("run", run.run_id) - TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id)) - self._wait_for_processed_run(run.run_id, 400) - run_id = run.run_id - except openml.exceptions.OpenMLRunsExistError as e: - # The only error we expect, should fail otherwise. - run_ids = [int(run_id) for run_id in e.run_ids] - self.assertGreater(len(run_ids), 0) - run_id = random.choice(list(run_ids)) - - # now the actual unit test ... - run_trace = openml.runs.get_run_trace(run_id) - self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds) - @unittest.skipIf( LooseVersion(sklearn.__version__) < "0.20", reason="SimpleImputer doesn't handle mixed type DataFrame as input", diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 5f9b65495..1e7642b35 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -110,7 +110,7 @@ def test_list_tasks_paginate(self): self._check_task(tasks[tid]) def test_list_tasks_per_type_paginate(self): - size = 10 + size = 40 max = 100 task_types = [ TaskType.SUPERVISED_CLASSIFICATION, diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 9729100bb..b5ef7b2bf 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -11,7 +11,6 @@ class OpenMLTaskTest(TestBase): _multiprocess_can_split_ = True - _batch_size = 25 def mocked_perform_api_call(call, request_method): # TODO: JvR: Why is this not a staticmethod? @@ -33,7 +32,7 @@ def test_list_all_few_results_available(self, _perform_api_call): def test_list_all_for_datasets(self): required_size = 127 # default test server reset value - datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size) + datasets = openml.datasets.list_datasets(batch_size=100, size=required_size) self.assertEqual(len(datasets), required_size) for did in datasets: @@ -53,13 +52,13 @@ def test_list_datasets_with_high_size_parameter(self): def test_list_all_for_tasks(self): required_size = 1068 # default test server reset value - tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size) + tasks = openml.tasks.list_tasks(batch_size=1000, size=required_size) self.assertEqual(len(tasks), required_size) def test_list_all_for_flows(self): required_size = 15 # default test server reset value - flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size) + flows = openml.flows.list_flows(batch_size=25, size=required_size) self.assertEqual(len(flows), required_size) @@ -73,7 +72,7 @@ def test_list_all_for_setups(self): def test_list_all_for_runs(self): required_size = 21 - runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size) + runs = openml.runs.list_runs(batch_size=25, size=required_size) # might not be on test server after reset, please rerun test at least once if fails self.assertEqual(len(runs), required_size)