diff --git a/doc/conf.py b/doc/conf.py index e5de2d551..f0f26318c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -49,7 +49,7 @@ autosummary_generate = True numpydoc_show_class_members = False -autodoc_default_flags = ["members", "inherited-members"] +autodoc_default_options = {"members": True, "inherited-members": True} # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/doc/index.rst b/doc/index.rst index e38e4d877..b78b7c009 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -38,7 +38,7 @@ Example # Publish the experiment on OpenML (optional, requires an API key. # You can get your own API key by signing up to OpenML.org) run.publish() - print(f'View the run online: {openml.config.server}/run/{run.run_id}') + print(f'View the run online: {run.openml_url}') You can find more examples in our `examples gallery `_. diff --git a/doc/progress.rst b/doc/progress.rst index 1ca1e1d0e..f27dd1137 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -6,7 +6,20 @@ Changelog ========= -0.11.1 +0.12.1 +~~~~~~ + +* ADD #895/#1038: Measure runtimes of scikit-learn runs also for models which are parallelized + via the joblib. +* DOC #1050: Refer to the webpage instead of the XML file in the main example. +* DOC #1051: Document existing extensions to OpenML-Python besides the shipped scikit-learn + extension. +* FIX #1035: Render class attributes and methods again. +* FIX #1042: Fixes a rare concurrency issue with OpenML-Python and joblib which caused the joblib + worker pool to fail. +* FIX #1053: Fixes a bug which could prevent importing the package in a docker container. + +0.12.0 ~~~~~~ * ADD #964: Validate ``ignore_attribute``, ``default_target_attribute``, ``row_id_attribute`` are set to attributes that exist on the dataset when calling ``create_dataset``. * ADD #979: Dataset features and qualities are now also cached in pickle format. diff --git a/doc/usage.rst b/doc/usage.rst index 1d54baa62..23ef4ec84 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -145,10 +145,22 @@ obtained on. Learn how to share your datasets in the following tutorial: * `Upload a dataset `_ -~~~~~~~~~~~~~~~~~~~~~~~ +*********************** Extending OpenML-Python -~~~~~~~~~~~~~~~~~~~~~~~ +*********************** OpenML-Python provides an extension interface to connect other machine learning libraries than scikit-learn to OpenML. Please check the :ref:`api_extensions` and use the scikit-learn extension in :class:`openml.extensions.sklearn.SklearnExtension` as a starting point. + +Runtime measurement is incorporated in the OpenML sklearn-extension. Example usage and potential +usage for Hyperparameter Optimisation can be found in the example tutorial: +`HPO using OpenML `_ + + +Here is a list of currently maintained OpenML extensions: + +* `openml-keras `_ +* `openml-pytorch `_ +* `openml-tensorflow(for tensorflow 2+) `_ + diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py new file mode 100644 index 000000000..3d5183613 --- /dev/null +++ b/examples/30_extended/fetch_runtimes_tutorial.py @@ -0,0 +1,479 @@ +""" + +========================================== +Measuring runtimes for Scikit-learn models +========================================== + +The runtime of machine learning models on specific datasets can be a deciding +factor on the choice of algorithms, especially for benchmarking and comparison +purposes. OpenML's scikit-learn extension provides runtime data from runs of +model fit and prediction on tasks or datasets, for both the CPU-clock as well +as the actual wallclock-time incurred. The objective of this example is to +illustrate how to retrieve such timing measures, and also offer some potential +means of usage and interpretation of the same. + +It should be noted that there are multiple levels at which parallelism can occur. + +* At the outermost level, OpenML tasks contain fixed data splits, on which the + defined model/flow is executed. Thus, a model can be fit on each OpenML dataset fold + in parallel using the `n_jobs` parameter to `run_model_on_task` or `run_flow_on_task` + (illustrated under Case 2 & 3 below). + +* The model/flow specified can also include scikit-learn models that perform their own + parallelization. For instance, by specifying `n_jobs` in a Random Forest model definition + (covered under Case 2 below). + +* The sklearn model can further be an HPO estimator and contain it's own parallelization. + If the base estimator used also supports `parallelization`, then there's at least a 2-level nested + definition for parallelization possible (covered under Case 3 below). + +We shall cover these 5 representative scenarios for: + +* (Case 1) Retrieving runtimes for Random Forest training and prediction on each of the + cross-validation folds + +* (Case 2) Testing the above setting in a parallel setup and monitor the difference using + runtimes retrieved + +* (Case 3) Comparing RandomSearchCV and GridSearchCV on the above task based on runtimes + +* (Case 4) Running models that don't run in parallel or models which scikit-learn doesn't + parallelize + +* (Case 5) Running models that do not release the Python Global Interpreter Lock (GIL) +""" + +############################################################################ + +# License: BSD 3-Clause + +import openml +import numpy as np +from matplotlib import pyplot as plt +from joblib.parallel import parallel_backend + +from sklearn.naive_bayes import GaussianNB +from sklearn.tree import DecisionTreeClassifier +from sklearn.neural_network import MLPClassifier +from sklearn.ensemble import RandomForestClassifier +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV + + +############################################################################ +# Preparing tasks and scikit-learn models +# *************************************** + +task_id = 167119 + +task = openml.tasks.get_task(task_id) +print(task) + +# Viewing associated data +n_repeats, n_folds, n_samples = task.get_split_dimensions() +print( + "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format( + task_id, n_repeats, n_folds, n_samples, + ) +) + +# Creating utility function +def print_compare_runtimes(measures): + for repeat, val1 in measures["usercpu_time_millis_training"].items(): + for fold, val2 in val1.items(): + print( + "Repeat #{}-Fold #{}: CPU-{:.3f} vs Wall-{:.3f}".format( + repeat, fold, val2, measures["wall_clock_time_millis_training"][repeat][fold] + ) + ) + + +############################################################################ +# Case 1: Running a Random Forest model on an OpenML task +# ******************************************************* +# We'll run a Random Forest model and obtain an OpenML run object. We can +# see the evaluations recorded per fold for the dataset and the information +# available for this run. + +clf = RandomForestClassifier(n_estimators=10) + +run1 = openml.runs.run_model_on_task( + model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False, +) +measures = run1.fold_evaluations + +print("The timing and performance metrics available: ") +for key in measures.keys(): + print(key) +print() + +print( + "The performance metric is recorded under `predictive_accuracy` per " + "fold and can be retrieved as: " +) +for repeat, val1 in measures["predictive_accuracy"].items(): + for fold, val2 in val1.items(): + print("Repeat #{}-Fold #{}: {:.4f}".format(repeat, fold, val2)) + print() + +################################################################################ +# The remaining entries recorded in `measures` are the runtime records +# related as: +# +# usercpu_time_millis = usercpu_time_millis_training + usercpu_time_millis_testing +# +# wall_clock_time_millis = wall_clock_time_millis_training + wall_clock_time_millis_testing +# +# The timing measures recorded as `*_millis_training` contain the per +# repeat-per fold timing incurred for the execution of the `.fit()` procedure +# of the model. For `usercpu_time_*` the time recorded using `time.process_time()` +# is converted to `milliseconds` and stored. Similarly, `time.time()` is used +# to record the time entry for `wall_clock_time_*`. The `*_millis_testing` entry +# follows the same procedure but for time taken for the `.predict()` procedure. + +# Comparing the CPU and wall-clock training times of the Random Forest model +print_compare_runtimes(measures) + +###################################################################### +# Case 2: Running Scikit-learn model on an OpenML task in parallel +# **************************************************************** +# Redefining the model to allow parallelism with `n_jobs=2` (2 cores) + +clf = RandomForestClassifier(n_estimators=10, n_jobs=2) + +run2 = openml.runs.run_model_on_task( + model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False +) +measures = run2.fold_evaluations +# The wall-clock time recorded per fold should be lesser than Case 1 above +print_compare_runtimes(measures) + +#################################################################################### +# Running a Random Forest model on an OpenML task in parallel (all cores available): + +# Redefining the model to use all available cores with `n_jobs=-1` +clf = RandomForestClassifier(n_estimators=10, n_jobs=-1) + +run3 = openml.runs.run_model_on_task( + model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False +) +measures = run3.fold_evaluations +# The wall-clock time recorded per fold should be lesser than the case above, +# if more than 2 CPU cores are available. The speed-up is more pronounced for +# larger datasets. +print_compare_runtimes(measures) + +#################################################################################### +# We can now observe that the ratio of CPU time to wallclock time is lower +# than in case 1. This happens because joblib by default spawns subprocesses +# for the workloads for which CPU time cannot be tracked. Therefore, interpreting +# the reported CPU and wallclock time requires knowledge of the parallelization +# applied at runtime. + +#################################################################################### +# Running the same task with a different parallel backend. Joblib provides multiple +# backends: {`loky` (default), `multiprocessing`, `dask`, `threading`, `sequential`}. +# The backend can be explicitly set using a joblib context manager. The behaviour of +# the job distribution can change and therefore the scale of runtimes recorded too. + +with parallel_backend(backend="multiprocessing", n_jobs=-1): + run3_ = openml.runs.run_model_on_task( + model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False + ) +measures = run3_.fold_evaluations +print_compare_runtimes(measures) + +#################################################################################### +# The CPU time interpretation becomes ambiguous when jobs are distributed over an +# unknown number of cores or when subprocesses are spawned for which the CPU time +# cannot be tracked, as in the examples above. It is impossible for OpenML-Python +# to capture the availability of the number of cores/threads, their eventual +# utilisation and whether workloads are executed in subprocesses, for various +# cases that can arise as demonstrated in the rest of the example. Therefore, +# the final interpretation of the runtimes is left to the `user`. + +##################################################################### +# Case 3: Running and benchmarking HPO algorithms with their runtimes +# ******************************************************************* +# We shall now optimize a similar RandomForest model for the same task using +# scikit-learn's HPO support by using GridSearchCV to optimize our earlier +# RandomForest model's hyperparameter `n_estimators`. Scikit-learn also provides a +# `refit_time_` for such HPO models, i.e., the time incurred by training +# and evaluating the model on the best found parameter setting. This is +# included in the `wall_clock_time_millis_training` measure recorded. + +from sklearn.model_selection import GridSearchCV + + +clf = RandomForestClassifier(n_estimators=10, n_jobs=2) + +# GridSearchCV model +n_iter = 5 +grid_pipe = GridSearchCV( + estimator=clf, + param_grid={"n_estimators": np.linspace(start=1, stop=50, num=n_iter).astype(int).tolist()}, + cv=2, + n_jobs=2, +) + +run4 = openml.runs.run_model_on_task( + model=grid_pipe, task=task, upload_flow=False, avoid_duplicate_runs=False, n_jobs=2 +) +measures = run4.fold_evaluations +print_compare_runtimes(measures) + +################################################################################## +# Like any optimisation problem, scikit-learn's HPO estimators also generate +# a sequence of configurations which are evaluated, using which the best found +# configuration is tracked throughout the trace. +# The OpenML run object stores these traces as OpenMLRunTrace objects accessible +# using keys of the pattern (repeat, fold, iterations). Here `fold` implies the +# outer-cross validation fold as obtained from the task data splits in OpenML. +# GridSearchCV here performs grid search over the inner-cross validation folds as +# parameterized by the `cv` parameter. Since `GridSearchCV` in this example performs a +# `2-fold` cross validation, the runtime recorded per repeat-per fold in the run object +# is for the entire `fit()` procedure of GridSearchCV thus subsuming the runtimes of +# the 2-fold (inner) CV search performed. + +# We earlier extracted the number of repeats and folds for this task: +print("# repeats: {}\n# folds: {}".format(n_repeats, n_folds)) + +# To extract the training runtime of the first repeat, first fold: +print(run4.fold_evaluations["wall_clock_time_millis_training"][0][0]) + +################################################################################## +# To extract the training runtime of the 1-st repeat, 4-th (outer) fold and also +# to fetch the parameters and performance of the evaluations made during +# the 1-st repeat, 4-th fold evaluation by the Grid Search model. + +_repeat = 0 +_fold = 3 +print( + "Total runtime for repeat {}'s fold {}: {:4f} ms".format( + _repeat, _fold, run4.fold_evaluations["wall_clock_time_millis_training"][_repeat][_fold] + ) +) +for i in range(n_iter): + key = (_repeat, _fold, i) + r = run4.trace.trace_iterations[key] + print( + "n_estimators: {:>2} - score: {:.3f}".format( + r.parameters["parameter_n_estimators"], r.evaluation + ) + ) + +################################################################################## +# Scikit-learn's HPO estimators also come with an argument `refit=True` as a default. +# In our previous model definition it was set to True by default, which meant that the best +# found hyperparameter configuration was used to refit or retrain the model without any inner +# cross validation. This extra refit time measure is provided by the scikit-learn model as the +# attribute `refit_time_`. +# This time is included in the `wall_clock_time_millis_training` measure. +# +# For non-HPO estimators, `wall_clock_time_millis = wall_clock_time_millis_training + wall_clock_time_millis_testing`. +# +# For HPO estimators, `wall_clock_time_millis = wall_clock_time_millis_training + wall_clock_time_millis_testing + refit_time`. +# +# This refit time can therefore be explicitly extracted in this manner: + + +def extract_refit_time(run, repeat, fold): + refit_time = ( + run.fold_evaluations["wall_clock_time_millis"][repeat][fold] + - run.fold_evaluations["wall_clock_time_millis_training"][repeat][fold] + - run.fold_evaluations["wall_clock_time_millis_testing"][repeat][fold] + ) + return refit_time + + +for repeat in range(n_repeats): + for fold in range(n_folds): + print( + "Repeat #{}-Fold #{}: {:.4f}".format( + repeat, fold, extract_refit_time(run4, repeat, fold) + ) + ) + +############################################################################ +# Along with the GridSearchCV already used above, we demonstrate how such +# optimisation traces can be retrieved by showing an application of these +# traces - comparing the speed of finding the best configuration using +# RandomizedSearchCV and GridSearchCV available with scikit-learn. + +# RandomizedSearchCV model +rs_pipe = RandomizedSearchCV( + estimator=clf, + param_distributions={ + "n_estimators": np.linspace(start=1, stop=50, num=15).astype(int).tolist() + }, + cv=2, + n_iter=n_iter, + n_jobs=2, +) +run5 = openml.runs.run_model_on_task( + model=rs_pipe, task=task, upload_flow=False, avoid_duplicate_runs=False, n_jobs=2 +) + +################################################################################ +# Since for the call to ``openml.runs.run_model_on_task`` the parameter +# ``n_jobs`` is set to its default ``None``, the evaluations across the OpenML folds +# are not parallelized. Hence, the time recorded is agnostic to the ``n_jobs`` +# being set at both the HPO estimator ``GridSearchCV`` as well as the base +# estimator ``RandomForestClassifier`` in this case. The OpenML extension only records the +# time taken for the completion of the complete ``fit()`` call, per-repeat per-fold. +# +# This notion can be used to extract and plot the best found performance per +# fold by the HPO model and the corresponding time taken for search across +# that fold. Moreover, since ``n_jobs=None`` for ``openml.runs.run_model_on_task`` +# the runtimes per fold can be cumulatively added to plot the trace against time. + + +def extract_trace_data(run, n_repeats, n_folds, n_iter, key=None): + key = "wall_clock_time_millis_training" if key is None else key + data = {"score": [], "runtime": []} + for i_r in range(n_repeats): + for i_f in range(n_folds): + data["runtime"].append(run.fold_evaluations[key][i_r][i_f]) + for i_i in range(n_iter): + r = run.trace.trace_iterations[(i_r, i_f, i_i)] + if r.selected: + data["score"].append(r.evaluation) + break + return data + + +def get_incumbent_trace(trace): + best_score = 1 + inc_trace = [] + for i, r in enumerate(trace): + if i == 0 or (1 - r) < best_score: + best_score = 1 - r + inc_trace.append(best_score) + return inc_trace + + +grid_data = extract_trace_data(run4, n_repeats, n_folds, n_iter) +rs_data = extract_trace_data(run5, n_repeats, n_folds, n_iter) + +plt.clf() +plt.plot( + np.cumsum(grid_data["runtime"]), get_incumbent_trace(grid_data["score"]), label="Grid Search" +) +plt.plot( + np.cumsum(rs_data["runtime"]), get_incumbent_trace(rs_data["score"]), label="Random Search" +) +plt.xscale("log") +plt.yscale("log") +plt.xlabel("Wallclock time (in milliseconds)") +plt.ylabel("1 - Accuracy") +plt.title("Optimisation Trace Comparison") +plt.legend() +plt.show() + +################################################################################ +# Case 4: Running models that scikit-learn doesn't parallelize +# ************************************************************* +# Both scikit-learn and OpenML depend on parallelism implemented through `joblib`. +# However, there can be cases where either models cannot be parallelized or don't +# depend on joblib for its parallelism. 2 such cases are illustrated below. +# +# Running a Decision Tree model that doesn't support parallelism implicitly, but +# using OpenML to parallelize evaluations for the outer-cross validation folds. + +dt = DecisionTreeClassifier() + +run6 = openml.runs.run_model_on_task( + model=dt, task=task, upload_flow=False, avoid_duplicate_runs=False, n_jobs=2 +) +measures = run6.fold_evaluations +print_compare_runtimes(measures) + +################################################################################ +# Although the decision tree does not run in parallel, it can release the +# `Python GIL `_. +# This can result in surprising runtime measures as demonstrated below: + +with parallel_backend("threading", n_jobs=-1): + run7 = openml.runs.run_model_on_task( + model=dt, task=task, upload_flow=False, avoid_duplicate_runs=False + ) +measures = run7.fold_evaluations +print_compare_runtimes(measures) + +################################################################################ +# Running a Neural Network from scikit-learn that uses scikit-learn independent +# parallelism using libraries such as `MKL, OpenBLAS or BLIS +# `_. + +mlp = MLPClassifier(max_iter=10) + +run8 = openml.runs.run_model_on_task( + model=mlp, task=task, upload_flow=False, avoid_duplicate_runs=False +) +measures = run8.fold_evaluations +print_compare_runtimes(measures) + +################################################################################ +# Case 5: Running Scikit-learn models that don't release GIL +# ********************************************************** +# Certain Scikit-learn models do not release the `Python GIL +# `_ and +# are also not executed in parallel via a BLAS library. In such cases, the +# CPU times and wallclock times are most likely trustworthy. Note however +# that only very few models such as naive Bayes models are of this kind. + +clf = GaussianNB() + +with parallel_backend("multiprocessing", n_jobs=-1): + run9 = openml.runs.run_model_on_task( + model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False + ) +measures = run9.fold_evaluations +print_compare_runtimes(measures) + +################################################################################ +# Summmary +# ********* +# The scikit-learn extension for OpenML-Python records model runtimes for the +# CPU-clock and the wall-clock times. The above examples illustrated how these +# recorded runtimes can be extracted when using a scikit-learn model and under +# parallel setups too. To summarize, the scikit-learn extension measures the: +# +# * `CPU-time` & `wallclock-time` for the whole run +# +# * A run here corresponds to a call to `run_model_on_task` or `run_flow_on_task` +# * The recorded time is for the model fit for each of the outer-cross validations folds, +# i.e., the OpenML data splits +# +# * Python's `time` module is used to compute the runtimes +# +# * `CPU-time` is recorded using the responses of `time.process_time()` +# * `wallclock-time` is recorded using the responses of `time.time()` +# +# * The timings recorded by OpenML per outer-cross validation fold is agnostic to +# model parallelisation +# +# * The wallclock times reported in Case 2 above highlights the speed-up on using `n_jobs=-1` +# in comparison to `n_jobs=2`, since the timing recorded by OpenML is for the entire +# `fit()` procedure, whereas the parallelisation is performed inside `fit()` by scikit-learn +# * The CPU-time for models that are run in parallel can be difficult to interpret +# +# * `CPU-time` & `wallclock-time` for each search per outer fold in an HPO run +# +# * Reports the total time for performing search on each of the OpenML data split, subsuming +# any sort of parallelism that happened as part of the HPO estimator or the underlying +# base estimator +# * Also allows extraction of the `refit_time` that scikit-learn measures using `time.time()` +# for retraining the model per outer fold, for the best found configuration +# +# * `CPU-time` & `wallclock-time` for models that scikit-learn doesn't parallelize +# +# * Models like Decision Trees or naive Bayes don't parallelize and thus both the wallclock and +# CPU times are similar in runtime for the OpenML call +# * However, models implemented in Cython, such as the Decision Trees can release the GIL and +# still run in parallel if a `threading` backend is used by joblib. +# * Scikit-learn Neural Networks can undergo parallelization implicitly owing to thread-level +# parallelism involved in the linear algebraic operations and thus the wallclock-time and +# CPU-time can differ. +# +# Because of all the cases mentioned above it is crucial to understand which case is triggered +# when reporting runtimes for scikit-learn models measured with OpenML-Python! diff --git a/openml/__version__.py b/openml/__version__.py index ff4effa59..700e61f6a 100644 --- a/openml/__version__.py +++ b/openml/__version__.py @@ -3,4 +3,4 @@ # License: BSD 3-Clause # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.12.0" +__version__ = "0.12.1" diff --git a/openml/config.py b/openml/config.py index 9e2e697d5..4516e96e1 100644 --- a/openml/config.py +++ b/openml/config.py @@ -204,7 +204,7 @@ def _setup(config=None): # read config file, create directory for config file if not os.path.exists(config_dir): try: - os.mkdir(config_dir) + os.makedirs(config_dir, exist_ok=True) cache_exists = True except PermissionError: cache_exists = False @@ -235,7 +235,7 @@ def _get(config, key): # create the cache subdirectory if not os.path.exists(cache_directory): try: - os.mkdir(cache_directory) + os.makedirs(cache_directory, exist_ok=True) except PermissionError: openml_logger.warning( "No permission to create openml cache directory at %s! This can result in " diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index 3441b4a4e..a0c551e83 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -1455,53 +1455,6 @@ def _prevent_optimize_n_jobs(self, model): "openml-python should not be used to " "optimize the n_jobs parameter." ) - def _can_measure_cputime(self, model: Any) -> bool: - """ - Returns True if the parameter settings of model are chosen s.t. the model - will run on a single core (if so, openml-python can measure cpu-times) - - Parameters: - ----------- - model: - The model that will be fitted - - Returns: - -------- - bool: - True if all n_jobs parameters will be either set to None or 1, False otherwise - """ - if not (isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)): - raise ValueError("model should be BaseEstimator or BaseSearchCV") - - # check the parameters for n_jobs - n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs") - for val in n_jobs_vals: - if val is not None and val != 1 and val != "deprecated": - return False - return True - - def _can_measure_wallclocktime(self, model: Any) -> bool: - """ - Returns True if the parameter settings of model are chosen s.t. the model - will run on a preset number of cores (if so, openml-python can measure wall-clock time) - - Parameters: - ----------- - model: - The model that will be fitted - - Returns: - -------- - bool: - True if no n_jobs parameters is set to -1, False otherwise - """ - if not (isinstance(model, sklearn.base.BaseEstimator) or self._is_hpo_class(model)): - raise ValueError("model should be BaseEstimator or BaseSearchCV") - - # check the parameters for n_jobs - n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs") - return -1 not in n_jobs_vals - ################################################################################################ # Methods for performing runs with extension modules @@ -1725,12 +1678,8 @@ def _prediction_to_probabilities( model_copy = sklearn.base.clone(model, safe=True) # sanity check: prohibit users from optimizing n_jobs self._prevent_optimize_n_jobs(model_copy) - # Runtime can be measured if the model is run sequentially - can_measure_cputime = self._can_measure_cputime(model_copy) - can_measure_wallclocktime = self._can_measure_wallclocktime(model_copy) - + # measures and stores runtimes user_defined_measures = OrderedDict() # type: 'OrderedDict[str, float]' - try: # for measuring runtime. Only available since Python 3.3 modelfit_start_cputime = time.process_time() @@ -1742,14 +1691,11 @@ def _prediction_to_probabilities( model_copy.fit(X_train) modelfit_dur_cputime = (time.process_time() - modelfit_start_cputime) * 1000 - if can_measure_cputime: - user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime - modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000 - if hasattr(model_copy, "refit_time_"): - modelfit_dur_walltime += model_copy.refit_time_ - if can_measure_wallclocktime: - user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime + + user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime + refit_time = model_copy.refit_time_ * 1000 if hasattr(model_copy, "refit_time_") else 0 + user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime except AttributeError as e: # typically happens when training a regressor on classification task @@ -1792,20 +1738,16 @@ def _prediction_to_probabilities( else: raise ValueError(task) - if can_measure_cputime: - modelpredict_duration_cputime = ( - time.process_time() - modelpredict_start_cputime - ) * 1000 - user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime - user_defined_measures["usercpu_time_millis"] = ( - modelfit_dur_cputime + modelpredict_duration_cputime - ) - if can_measure_wallclocktime: - modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000 - user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime - user_defined_measures["wall_clock_time_millis"] = ( - modelfit_dur_walltime + modelpredict_duration_walltime - ) + modelpredict_duration_cputime = (time.process_time() - modelpredict_start_cputime) * 1000 + user_defined_measures["usercpu_time_millis_testing"] = modelpredict_duration_cputime + user_defined_measures["usercpu_time_millis"] = ( + modelfit_dur_cputime + modelpredict_duration_cputime + ) + modelpredict_duration_walltime = (time.time() - modelpredict_start_walltime) * 1000 + user_defined_measures["wall_clock_time_millis_testing"] = modelpredict_duration_walltime + user_defined_measures["wall_clock_time_millis"] = ( + modelfit_dur_walltime + modelpredict_duration_walltime + refit_time + ) if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)): diff --git a/setup.py b/setup.py index dc1a58863..2d2a638b5 100644 --- a/setup.py +++ b/setup.py @@ -84,7 +84,7 @@ "seaborn", ], "examples_unix": ["fanova"], - "docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"], + "docs": ["sphinx>=3", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc",], }, test_suite="pytest", classifiers=[ diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index c1f88bcda..e45eeea53 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -1280,19 +1280,13 @@ def test_paralizable_check(self): sklearn.model_selection.GridSearchCV(multicore_bagging, illegal_param_dist), ] - can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False] - can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False] if LooseVersion(sklearn.__version__) < "0.20": has_refit_time = [False, False, False, False, False, False, False, False, False] else: has_refit_time = [False, False, False, False, False, False, True, True, False] X, y = sklearn.datasets.load_iris(return_X_y=True) - for model, allowed_cputime, allowed_walltime, refit_time in zip( - legal_models, can_measure_cputime_answers, can_measure_walltime_answers, has_refit_time - ): - self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime) - self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime) + for model, refit_time in zip(legal_models, has_refit_time): model.fit(X, y) self.assertEqual(refit_time, hasattr(model, "refit_time_")) diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py index 5b15f781e..2e2c609db 100644 --- a/tests/test_openml/test_config.py +++ b/tests/test_openml/test_config.py @@ -26,6 +26,16 @@ def test_non_writable_home(self, log_handler_mock, warnings_mock, expanduser_moc self.assertEqual(log_handler_mock.call_count, 1) self.assertFalse(log_handler_mock.call_args_list[0][1]["create_file_handler"]) + @unittest.mock.patch("os.path.expanduser") + def test_XDG_directories_do_not_exist(self, expanduser_mock): + with tempfile.TemporaryDirectory(dir=self.workdir) as td: + + def side_effect(path_): + return os.path.join(td, str(path_).replace("~/", "")) + + expanduser_mock.side_effect = side_effect + openml.config._setup() + def test_get_config_as_dict(self): """ Checks if the current configuration is returned accurately as a dict. """ config = openml.config.get_config_as_dict() diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 4534f26a4..c8f1729b7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1635,13 +1635,13 @@ def test_joblib_backends(self, parallel_mock): line_length = 6 + len(task.class_labels) backend_choice = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing" - for n_jobs, backend, len_time_stats, call_count in [ - (1, backend_choice, 7, 10), - (2, backend_choice, 4, 10), - (-1, backend_choice, 1, 10), - (1, "threading", 7, 20), - (-1, "threading", 1, 30), - (1, "sequential", 7, 40), + for n_jobs, backend, call_count in [ + (1, backend_choice, 10), + (2, backend_choice, 10), + (-1, backend_choice, 10), + (1, "threading", 20), + (-1, "threading", 30), + (1, "sequential", 40), ]: clf = sklearn.model_selection.RandomizedSearchCV( estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5), @@ -1674,8 +1674,6 @@ def test_joblib_backends(self, parallel_mock): self.assertEqual(len(res[0][0]), line_length) # usercpu_time_millis_* not recorded when n_jobs > 1 # *_time_millis_* not recorded when n_jobs = -1 - self.assertEqual(len(res[2]), len_time_stats) - self.assertEqual(len(res[3]), len_time_stats) self.assertEqual(len(res[2]["predictive_accuracy"][0]), 10) self.assertEqual(len(res[3]["predictive_accuracy"][0]), 10) self.assertEqual(parallel_mock.call_count, call_count)