diff --git a/.github/workflows/ubuntu-test.yml b/.github/workflows/ubuntu-test.yml index c78de6445..33b57179b 100644 --- a/.github/workflows/ubuntu-test.yml +++ b/.github/workflows/ubuntu-test.yml @@ -51,7 +51,7 @@ jobs: - name: Run tests run: | if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi - pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread -sv $codecov + pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov - name: Check for files left behind by test if: ${{ always() }} run: | diff --git a/appveyor.yml b/appveyor.yml index 151a5e3f7..e3fa74aaf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -45,4 +45,4 @@ build: false test_script: - "cd C:\\projects\\openml-python" - - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread -sv" + - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread --dist load -sv" diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index a51603d8d..0d23a0a75 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -13,7 +13,6 @@ import numpy as np import pandas as pd import scipy.sparse -from warnings import warn from openml.base import OpenMLBase from .data_feature import OpenMLDataFeature @@ -34,7 +33,7 @@ class OpenMLDataset(OpenMLBase): Name of the dataset. description : str Description of the dataset. - format : str + data_format : str Format of the dataset which can be either 'arff' or 'sparse_arff'. cache_format : str Format for caching the dataset which can be either 'feather' or 'pickle'. @@ -103,7 +102,6 @@ def __init__( self, name, description, - format=None, data_format="arff", cache_format="pickle", dataset_id=None, @@ -178,16 +176,8 @@ def find_invalid_characters(string, pattern): ) self.cache_format = cache_format - if format is None: - self.format = data_format - else: - warn( - "The format parameter in the init will be deprecated " - "in the future." - "Please use data_format instead", - DeprecationWarning, - ) - self.format = format + # Has to be called format, otherwise there will be an XML upload error + self.format = data_format self.creator = creator self.contributor = contributor self.collection_date = collection_date @@ -456,12 +446,11 @@ def _parse_data_from_arff( col.append( self._unpack_categories(X[column_name], categories_names[column_name]) ) - elif attribute_dtype[column_name] in ('floating', - 'integer'): + elif attribute_dtype[column_name] in ("floating", "integer"): X_col = X[column_name] if X_col.min() >= 0 and X_col.max() <= 255: try: - X_col_uint = X_col.astype('uint8') + X_col_uint = X_col.astype("uint8") if (X_col == X_col_uint).all(): col.append(X_col_uint) continue diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 0339667bc..1cd979af5 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -1748,7 +1748,7 @@ def _prediction_to_probabilities( proba_y.shape[1], len(task.class_labels), ) warnings.warn(message) - openml.config.logger.warn(message) + openml.config.logger.warning(message) for i, col in enumerate(task.class_labels): # adding missing columns with 0 probability diff --git a/openml/flows/flow.py b/openml/flows/flow.py index 5aaf70a9d..2acbcb0d1 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -229,7 +229,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]": if not self.description: logger = logging.getLogger(__name__) - logger.warn("Flow % has empty description", self.name) + logger.warning("Flow % has empty description", self.name) flow_parameters = [] for key in self.parameters: diff --git a/openml/study/functions.py b/openml/study/functions.py index 632581022..ee877ddf2 100644 --- a/openml/study/functions.py +++ b/openml/study/functions.py @@ -58,7 +58,7 @@ def get_study( "of things have changed since then. Please use `get_suite('OpenML100')` instead." ) warnings.warn(message, DeprecationWarning) - openml.config.logger.warn(message) + openml.config.logger.warning(message) study = _get_study(study_id, entity_type="task") return cast(OpenMLBenchmarkSuite, study) # type: ignore else: diff --git a/tests/conftest.py b/tests/conftest.py index 6a66d4ed9..1b733ac19 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -126,7 +126,7 @@ def delete_remote_files(tracker) -> None: openml.utils._delete_entity(entity_type, entity) logger.info("Deleted ({}, {})".format(entity_type, entity)) except Exception as e: - logger.warn("Cannot delete ({},{}): {}".format(entity_type, entity, e)) + logger.warning("Cannot delete ({},{}): {}".format(entity_type, entity, e)) def pytest_sessionstart() -> None: diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 82a90154e..3d931d3cf 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -1,7 +1,6 @@ # License: BSD 3-Clause from time import time -from warnings import filterwarnings, catch_warnings import numpy as np import pandas as pd @@ -120,11 +119,11 @@ def test_get_data_no_str_data_for_nparrays(self): def _check_expected_type(self, dtype, is_cat, col): if is_cat: - expected_type = 'category' - elif not col.isna().any() and (col.astype('uint8') == col).all(): - expected_type = 'uint8' + expected_type = "category" + elif not col.isna().any() and (col.astype("uint8") == col).all(): + expected_type = "uint8" else: - expected_type = 'float64' + expected_type = "float64" self.assertEqual(dtype.name, expected_type) @@ -192,14 +191,6 @@ def test_get_data_with_ignore_attributes(self): self.assertEqual(rval.shape, (898, 38)) self.assertEqual(len(categorical), 38) - def test_dataset_format_constructor(self): - - with catch_warnings(): - filterwarnings("error") - self.assertRaises( - DeprecationWarning, openml.OpenMLDataset, "Test", "Test", format="arff" - ) - def test_get_data_with_nonexisting_class(self): # This class is using the anneal dataset with labels [1, 2, 3, 4, 5, 'U']. However, # label 4 does not exist and we test that the features 5 and 'U' are correctly mapped to diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 5ea2dd0e1..101001599 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -4,6 +4,7 @@ import random from itertools import product from unittest import mock +import shutil import arff import time @@ -373,9 +374,9 @@ def test_get_dataset_by_name(self): def test_get_dataset_uint8_dtype(self): dataset = openml.datasets.get_dataset(1) self.assertEqual(type(dataset), OpenMLDataset) - self.assertEqual(dataset.name, 'anneal') + self.assertEqual(dataset.name, "anneal") df, _, _, _ = dataset.get_data() - self.assertEqual(df['carbon'].dtype, 'uint8') + self.assertEqual(df["carbon"].dtype, "uint8") def test_get_dataset(self): # This is the only non-lazy load to ensure default behaviour works. @@ -1154,27 +1155,31 @@ def test_publish_fetch_ignore_attribute(self): # test if publish was successful self.assertIsInstance(dataset.id, int) + downloaded_dataset = self._wait_for_dataset_being_processed(dataset.id) + self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute) + + def _wait_for_dataset_being_processed(self, dataset_id): downloaded_dataset = None # fetching from server # loop till timeout or fetch not successful - max_waiting_time_seconds = 400 + max_waiting_time_seconds = 600 # time.time() works in seconds start_time = time.time() while time.time() - start_time < max_waiting_time_seconds: try: - downloaded_dataset = openml.datasets.get_dataset(dataset.id) + downloaded_dataset = openml.datasets.get_dataset(dataset_id) break except Exception as e: # returned code 273: Dataset not processed yet # returned code 362: No qualities found TestBase.logger.error( - "Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e)) + "Failed to fetch dataset:{} with '{}'.".format(dataset_id, str(e)) ) time.sleep(10) continue if downloaded_dataset is None: - raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset.id)) - self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute) + raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset_id)) + return downloaded_dataset def test_create_dataset_row_id_attribute_error(self): # meta-information @@ -1347,7 +1352,7 @@ def test_get_dataset_cache_format_feather(self): self.assertEqual(len(categorical), X.shape[1]) self.assertEqual(len(attribute_names), X.shape[1]) - def test_data_edit(self): + def test_data_edit_non_critical_field(self): # Case 1 # All users can edit non-critical fields of datasets desc = ( @@ -1368,14 +1373,31 @@ def test_data_edit(self): edited_dataset = openml.datasets.get_dataset(did) self.assertEqual(edited_dataset.description, desc) + def test_data_edit_critical_field(self): # Case 2 # only owners (or admin) can edit all critical fields of datasets - # this is a dataset created by CI, so it is editable by this test - did = 315 - result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2") + # for this, we need to first clone a dataset to do changes + did = fork_dataset(1) + self._wait_for_dataset_being_processed(did) + result = edit_dataset(did, default_target_attribute="shape", ignore_attribute="oil") self.assertEqual(did, result) - edited_dataset = openml.datasets.get_dataset(did) - self.assertEqual(edited_dataset.ignore_attribute, ["col_2"]) + + n_tries = 10 + # we need to wait for the edit to be reflected on the server + for i in range(n_tries): + edited_dataset = openml.datasets.get_dataset(did) + try: + self.assertEqual(edited_dataset.default_target_attribute, "shape", edited_dataset) + self.assertEqual(edited_dataset.ignore_attribute, ["oil"], edited_dataset) + break + except AssertionError as e: + if i == n_tries - 1: + raise e + time.sleep(10) + # Delete the cache dir to get the newer version of the dataset + shutil.rmtree( + os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did)) + ) def test_data_edit_errors(self): # Check server exception when no field to edit is provided diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index c4628c452..b155d6cd5 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -442,7 +442,7 @@ def determine_grid_size(param_grid): # suboptimal (slow), and not guaranteed to work if evaluation # engine is behind. # TODO: mock this? We have the arff already on the server - self._wait_for_processed_run(run.run_id, 400) + self._wait_for_processed_run(run.run_id, 600) try: model_prime = openml.runs.initialize_model_from_trace( run_id=run.run_id, repeat=0, fold=0, @@ -519,7 +519,7 @@ def _run_and_upload_regression( ) def test_run_and_upload_logistic_regression(self): - lr = LogisticRegression(solver="lbfgs") + lr = LogisticRegression(solver="lbfgs", max_iter=1000) task_id = self.TEST_SERVER_TASK_SIMPLE[0] n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1] n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2] @@ -605,7 +605,8 @@ def get_ct_cf(nominal_indices, numeric_indices): LooseVersion(sklearn.__version__) < "0.20", reason="columntransformer introduction in 0.20.0", ) - def test_run_and_upload_knn_pipeline(self): + @unittest.mock.patch("warnings.warn") + def test_run_and_upload_knn_pipeline(self, warnings_mock): cat_imp = make_pipeline( SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore") @@ -635,11 +636,18 @@ def test_run_and_upload_knn_pipeline(self): n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS[1] n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS[2] self._run_and_upload_classification(pipeline2, task_id, n_missing_vals, n_test_obs, "62501") + # The warning raised is: + # The total space of parameters 8 is smaller than n_iter=10. + # Running 8 iterations. For exhaustive searches, use GridSearchCV.' + # It is raised three times because we once run the model to upload something and then run + # it again twice to compare that the predictions are reproducible. + self.assertEqual(warnings_mock.call_count, 3) def test_run_and_upload_gridsearch(self): gridsearch = GridSearchCV( BaggingClassifier(base_estimator=SVC()), {"base_estimator__C": [0.01, 0.1, 10], "base_estimator__gamma": [0.01, 0.1, 10]}, + cv=3, ) task_id = self.TEST_SERVER_TASK_SIMPLE[0] n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1] diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py index 137e29fe4..8cba6a9fe 100644 --- a/tests/test_tasks/test_task_methods.py +++ b/tests/test_tasks/test_task_methods.py @@ -40,9 +40,9 @@ def test_get_train_and_test_split_indices(self): self.assertEqual(681, train_indices[-1]) self.assertEqual(583, test_indices[0]) self.assertEqual(24, test_indices[-1]) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, "Fold 10 not known", task.get_train_test_split_indices, 10, 0 ) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, "Repeat 10 not known", task.get_train_test_split_indices, 0, 10 )