diff --git a/.all-contributorsrc b/.all-contributorsrc
new file mode 100644
index 000000000..3e16fe084
--- /dev/null
+++ b/.all-contributorsrc
@@ -0,0 +1,36 @@
+{
+  "files": [
+    "README.md"
+  ],
+  "imageSize": 100,
+  "commit": false,
+  "contributors": [
+    {
+      "login": "a-moadel",
+      "name": "a-moadel",
+      "avatar_url": "https://avatars0.githubusercontent.com/u/46557866?v=4",
+      "profile": "https://github.com/a-moadel",
+      "contributions": [
+        "doc",
+        "example"
+      ]
+    },
+    {
+      "login": "Neeratyoy",
+      "name": "Neeratyoy Mallik",
+      "avatar_url": "https://avatars2.githubusercontent.com/u/3191233?v=4",
+      "profile": "https://github.com/Neeratyoy",
+      "contributions": [
+        "code",
+        "doc",
+        "example"
+      ]
+    }
+  ],
+  "contributorsPerLine": 7,
+  "projectName": "openml-python",
+  "projectOwner": "openml",
+  "repoType": "github",
+  "repoHost": "https://github.com",
+  "skipCi": true
+}
diff --git a/.github/workflows/dist.yaml b/.github/workflows/dist.yaml
new file mode 100644
index 000000000..51ffe03d5
--- /dev/null
+++ b/.github/workflows/dist.yaml
@@ -0,0 +1,30 @@
+name: dist-check
+
+on: [push, pull_request]
+
+jobs:
+  dist:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Build dist
+      run: |
+        python setup.py sdist
+    - name: Twine check
+      run: |
+        pip install twine
+        last_dist=$(ls -t dist/openml-*.tar.gz | head -n 1)
+        twine check $last_dist
+    - name: Install dist
+      run: |
+        last_dist=$(ls -t dist/openml-*.tar.gz | head -n 1)
+        pip install $last_dist
+    - name: PEP 561 Compliance
+      run: |
+        pip install mypy
+        cd ..  # required to use the installed version of openml
+        if ! python -m mypy -c "import openml"; then exit 1; fi
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
new file mode 100644
index 000000000..2219c7fac
--- /dev/null
+++ b/.github/workflows/docs.yaml
@@ -0,0 +1,43 @@
+name: Docs
+on: [pull_request, push]
+
+jobs:
+  build-and-deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install dependencies
+      run: |
+        pip install -e .[docs,examples,examples_unix]
+    - name: Make docs
+      run: |
+        cd doc
+        make html
+    - name: Pull latest gh-pages
+      if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
+      run: |
+        cd ..
+        git clone https://github.com/openml/openml-python.git --branch gh-pages --single-branch gh-pages
+    - name: Copy new doc into gh-pages
+      if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
+      run: |
+        branch_name=${GITHUB_REF##*/}
+        cd ../gh-pages
+        rm -rf $branch_name
+        cp -r ../openml-python/doc/build/html $branch_name
+    - name: Push to gh-pages
+      if: (contains(github.ref, 'develop') || contains(github.ref, 'master')) && github.event_name == 'push'
+      run: |
+        last_commit=$(git log --pretty=format:"%an: %s")
+        cd ../gh-pages
+        branch_name=${GITHUB_REF##*/}
+        git add $branch_name/
+        git config --global user.name 'Github Actions'
+        git config --global user.email 'not@mail.com'
+        git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
+        git commit -am "$last_commit"
+        git push
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
new file mode 100644
index 000000000..6132b2de2
--- /dev/null
+++ b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,20 @@
+name: pre-commit
+
+on: [push]
+
+jobs:
+  run-all-files:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python 3.7
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+    - name: Install pre-commit
+      run: |
+        pip install pre-commit
+        pre-commit install
+    - name: Run pre-commit
+      run: |
+        pre-commit run --all-files
diff --git a/.github/workflows/ubuntu-test.yml b/.github/workflows/ubuntu-test.yml
new file mode 100644
index 000000000..41cc155ac
--- /dev/null
+++ b/.github/workflows/ubuntu-test.yml
@@ -0,0 +1,74 @@
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  ubuntu:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
+        exclude:  # no scikit-learn 0.21.2 release for Python 3.8
+          - python-version: 3.8
+            scikit-learn: 0.21.2
+        include:
+          - python-version: 3.6
+            scikit-learn: 0.18.2
+            scipy: 1.2.0
+          - python-version: 3.6
+            scikit-learn: 0.19.2
+          - python-version: 3.6
+            scikit-learn: 0.20.2
+          - python-version: 3.8
+            scikit-learn: 0.23.1
+            code-cov: true
+      fail-fast:  false
+      max-parallel: 4
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        fetch-depth: 2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install test dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .[test]
+    - name: Install scikit-learn ${{ matrix.scikit-learn }}
+      run: |
+        pip install scikit-learn==${{ matrix.scikit-learn }}
+    - name: Install scipy ${{ matrix.scipy }}
+      if: ${{ matrix.scipy }}
+      run: |
+        pip install scipy==${{ matrix.scipy }}
+    - name: Store repository status
+      id: status-before
+      run: |
+        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
+    - name: Run tests
+      run: |
+        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1
+    - name: Check for files left behind by test
+      if: ${{ always() }}
+      run: |
+        before="${{ steps.status-before.outputs.BEFORE }}"
+        after="$(git status --porcelain -b)"
+        if [[ "$before" != "$after" ]]; then
+            echo "git status from before: $before"
+            echo "git status from after: $after"
+            echo "Not all generated files have been deleted!"
+            exit 1
+        fi
+    - name: Upload coverage
+      if: matrix.code-cov && always()
+      uses: codecov/codecov-action@v1
+      with:
+        files: coverage.xml
+        fail_ci_if_error: true
+        verbose: true
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 9fd33403c..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,58 +0,0 @@
-language: python
-
-sudo: false
-
-cache:
-  apt: true
-  # We use three different cache directory
-  # to work around a Travis bug with multi-platform cache
-  directories:
-  - $HOME/.cache/pip
-  - $HOME/download
-env:
-  global:
-  # Directory where tests are run from
-  - TEST_DIR=/tmp/test_dir/
-  - MODULE=openml
-  matrix:
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" COVERAGE="true" DOCPUSH="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" RUN_FLAKE8="true" SKIP_TESTS="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.8" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.20.2"
-  # Checks for older scikit-learn versions (which also don't nicely work with
-  # Python3.7)
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.19.2"
-  - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.18.2" SCIPY_VERSION=1.2.0
-
-# Travis issue
-# https://github.com/travis-ci/travis-ci/issues/8920
-before_install:
- - python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"
-
-install: source ci_scripts/install.sh
-script: bash ci_scripts/test.sh
-after_success: source ci_scripts/success.sh && source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result"
-
-# travis will check the deploy on condition, before actually running before_deploy
-# before_deploy: source ci_scripts/create_doc.sh $TRAVIS_BRANCH "doc_result"
-
-# For more info regarding the deploy process and the github token look at:
-# https://docs.travis-ci.com/user/deployment/pages/
-
-deploy:
-  provider: pages
-  skip_cleanup: true
-  github_token: $GITHUB_TOKEN
-  keep-history: true
-  committer-from-gh: true
-  on:
-    all_branches: true
-    condition: $doc_result = "success"
-  local_dir: doc/$TRAVIS_BRANCH
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6b7cffad3..6fe4fd605 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -260,14 +260,9 @@ The resulting HTML files will be placed in ``build/html/`` and are viewable in
 a web browser. See the ``README`` file in the ``doc/`` directory for more
 information.
 
-For building the documentation, you will need
-[sphinx](http://sphinx.pocoo.org/),
-[sphinx-bootstrap-theme](https://ryan-roemer.github.io/sphinx-bootstrap-theme/),
-[sphinx-gallery](https://sphinx-gallery.github.io/)
-and
-[numpydoc](https://numpydoc.readthedocs.io/en/latest/).
+For building the documentation, you will need to install a few additional dependencies:
 ```bash
-$ pip install sphinx sphinx-bootstrap-theme sphinx-gallery numpydoc
+$ pip install -e .[docs]
 ```
 When dependencies are installed, run
 ```bash
diff --git a/README.md b/README.md
index 732085697..55bab368d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,7 @@
 # OpenML-Python
+<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
+[![All Contributors](https://img.shields.io/badge/all_contributors-2-orange.svg?style=flat-square)](#contributors-)
+<!-- ALL-CONTRIBUTORS-BADGE:END -->
 
 A python interface for [OpenML](http://openml.org), an online platform for open science collaboration in machine learning.
 It can be used to download or upload OpenML data such as datasets and machine learning experiment results.
@@ -40,3 +43,23 @@ Bibtex entry:
   year      = {2019},
 }
 ```
+
+## Contributors ✨
+
+Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
+
+<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-disable -->
+<table>
+  <tr>
+    <td align="center"><a href="https://github.com/a-moadel"><img src="https://avatars0.githubusercontent.com/u/46557866?v=4" width="100px;" alt=""/><br /><sub><b>a-moadel</b></sub></a><br /><a href="https://github.com/openml/openml-python/commits?author=a-moadel" title="Documentation">📖</a> <a href="#example-a-moadel" title="Examples">💡</a></td>
+    <td align="center"><a href="https://github.com/Neeratyoy"><img src="https://avatars2.githubusercontent.com/u/3191233?v=4" width="100px;" alt=""/><br /><sub><b>Neeratyoy Mallik</b></sub></a><br /><a href="https://github.com/openml/openml-python/commits?author=Neeratyoy" title="Code">💻</a> <a href="https://github.com/openml/openml-python/commits?author=Neeratyoy" title="Documentation">📖</a> <a href="#example-Neeratyoy" title="Examples">💡</a></td>
+  </tr>
+</table>
+
+<!-- markdownlint-enable -->
+<!-- prettier-ignore-end -->
+<!-- ALL-CONTRIBUTORS-LIST:END -->
+
+This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
\ No newline at end of file
diff --git a/appveyor.yml b/appveyor.yml
index 151a5e3f7..e3fa74aaf 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -45,4 +45,4 @@ build: false
 
 test_script:
   - "cd C:\\projects\\openml-python"
-  - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread -sv"
+  - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread --dist load -sv"
diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh
deleted file mode 100644
index 83afaa26b..000000000
--- a/ci_scripts/create_doc.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-# License: BSD 3-Clause
-
-set -euo pipefail
-
-# Check if DOCPUSH is set
-if ! [[ -z ${DOCPUSH+x} ]]; then
-
-    if [[ "$DOCPUSH" == "true" ]]; then
-
-        # install documentation building dependencies
-        pip install matplotlib seaborn sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert
-
-        # $1 is the branch name
-        # $2 is the global variable where we set the script status
-
-        if ! { [ $1 = "master" ] || [ $1 = "develop" ]; }; then
-            { echo "Not one of the allowed branches"; exit 0; }
-        fi
-
-        # delete any previous documentation folder
-        if [ -d doc/$1 ]; then
-            rm -rf doc/$1
-        fi
-
-        # create the documentation
-        cd doc && make html 2>&1
-
-        # create directory with branch name
-        # the documentation for dev/stable from git will be stored here
-        mkdir $1
-
-        # get previous documentation from github
-        git clone https://github.com/openml/openml-python.git --branch gh-pages --single-branch
-
-        # copy previous documentation
-        cp -r openml-python/. $1
-        rm -rf openml-python
-
-        # if the documentation for the branch exists, remove it
-        if [ -d $1/$1 ]; then
-            rm -rf $1/$1
-        fi
-
-        # copy the updated documentation for this branch
-        mkdir $1/$1
-        cp -r build/html/. $1/$1
-
-        # takes a variable name as an argument and assigns the script outcome to a
-        # variable with the given name. If it got this far, the script was successful
-        function set_return() {
-            # $1 is the variable where we save the script outcome
-            local __result=$1
-            local  status='success'
-            eval $__result="'$status'"
-        }
-
-        set_return "$2"
-    fi
-fi
-# Workaround for travis failure
-set +u
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
deleted file mode 100755
index 67530af53..000000000
--- a/ci_scripts/install.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-# License: BSD 3-Clause
-
-set -e
-
-# Deactivate the travis-provided virtual environment and setup a
-# conda-based environment instead
-deactivate
-
-# Use the miniconda installer for faster download / install of conda
-# itself
-pushd .
-cd
-mkdir -p download
-cd download
-echo "Cached in $HOME/download :"
-ls -l
-echo
-if [[ ! -f miniconda.sh ]]
-   then
-   wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
-       -O miniconda.sh
-   fi
-chmod +x miniconda.sh && ./miniconda.sh -b -p $HOME/miniconda
-cd ..
-export PATH=/home/travis/miniconda/bin:$PATH
-conda update --yes conda
-popd
-
-# Configure the conda environment and put it in the path using the
-# provided versions
-conda create -n testenv --yes python=$PYTHON_VERSION pip
-source activate testenv
-
-if [[ -v SCIPY_VERSION ]]; then
-    conda install --yes scipy=$SCIPY_VERSION
-fi
-python --version
-
-if [[ "$TEST_DIST" == "true" ]]; then
-    pip install twine nbconvert jupyter_client matplotlib pyarrow pytest pytest-xdist pytest-timeout \
-        nbformat oslo.concurrency flaky mypy
-    python setup.py sdist
-    # Find file which was modified last as done in https://stackoverflow.com/a/4561987
-    dist=`find dist -type f -printf '%T@ %p\n' | sort -n | tail -1 | cut -f2- -d" "`
-    echo "Installing $dist"
-    pip install "$dist"
-    twine check "$dist"
-else
-    pip install -e '.[test]'
-fi
-
-python -c "import numpy; print('numpy %s' % numpy.__version__)"
-python -c "import scipy; print('scipy %s' % scipy.__version__)"
-
-
-if [[ "$DOCPUSH" == "true" ]]; then
-    conda install --yes gxx_linux-64 gcc_linux-64 swig
-    pip install -e '.[examples,examples_unix]'
-fi
-if [[ "$COVERAGE" == "true" ]]; then
-    pip install codecov pytest-cov
-fi
-if [[ "$RUN_FLAKE8" == "true" ]]; then
-    pip install pre-commit
-    pre-commit install
-fi
-
-# PEP 561 compliance check
-# Assumes mypy relies solely on the PEP 561 standard
-if ! python -m mypy -c "import openml"; then
-   echo "Failed: PEP 561 compliance"
-   exit 1
-else
-   echo "Success: PEP 561 compliant"
-fi
-
-# Install scikit-learn last to make sure the openml package installation works
-# from a clean environment without scikit-learn.
-pip install scikit-learn==$SKLEARN_VERSION
-
-conda list
diff --git a/ci_scripts/success.sh b/ci_scripts/success.sh
deleted file mode 100644
index dad97d54e..000000000
--- a/ci_scripts/success.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-# License: BSD 3-Clause
-
-set -e
-
-if [[ "$COVERAGE" == "true" ]]; then
-    # Need to run coveralls from a git checkout, so we copy .coverage
-    # from TEST_DIR where pytest has been run
-    cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR
-    cd $TRAVIS_BUILD_DIR
-    # Ignore coveralls failures as the coveralls server is not
-    # very reliable but we don't want travis to report a failure
-    # in the github UI just because the coverage report failed to
-    # be published.
-    codecov || echo "Codecov upload failed"
-fi
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
deleted file mode 100644
index 0a1f94df6..000000000
--- a/ci_scripts/test.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-# License: BSD 3-Clause
-
-set -e
-
-# check status and branch before running the unit tests
-before="`git status --porcelain -b`"
-before="$before"
-# storing current working directory
-curr_dir=`pwd`
-
-run_tests() {
-    # Get into a temp directory to run test from the installed scikit learn and
-    # check if we  do not leave artifacts
-    mkdir -p $TEST_DIR
-
-    cwd=`pwd`
-    test_dir=$cwd/tests
-
-    cd $TEST_DIR
-
-    if [[ "$COVERAGE" == "true" ]]; then
-        PYTEST_ARGS='--cov=openml'
-    else
-        PYTEST_ARGS=''
-    fi
-
-    pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread -sv $PYTEST_ARGS $test_dir
-}
-
-if [[ "$RUN_FLAKE8" == "true" ]]; then
-    pre-commit run --all-files
-fi
-
-if [[ "$SKIP_TESTS" != "true" ]]; then
-    run_tests
-fi
-
-# changing directory to stored working directory
-cd $curr_dir
-# check status and branch after running the unit tests
-# compares with $before to check for remaining files
-after="`git status --porcelain -b`"
-if [[ "$before" != "$after" ]]; then
-    echo 'git status from before: '$before
-    echo 'git status from after: '$after
-    echo "All generated files have not been deleted!"
-    exit 1
-fi
diff --git a/doc/conf.py b/doc/conf.py
index 9c4606143..e5de2d551 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -64,10 +64,8 @@
 master_doc = "index"
 
 # General information about the project.
-project = u"OpenML"
-copyright = u"2014-{}, the OpenML-Python team.".format(
-    time.strftime("%Y,%m,%d,%H,%M,%S").split(",")[0]
-)
+project = "OpenML"
+copyright = f"2014-{time.localtime().tm_year}, the OpenML-Python team"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -263,7 +261,7 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    ("index", "OpenML.tex", u"OpenML Documentation", u"Matthias Feurer", "manual"),
+    ("index", "OpenML.tex", "OpenML Documentation", "Matthias Feurer", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -291,7 +289,7 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [("index", "openml", u"OpenML Documentation", [u"Matthias Feurer"], 1)]
+man_pages = [("index", "openml", "OpenML Documentation", ["Matthias Feurer"], 1)]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False
@@ -306,8 +304,8 @@
     (
         "index",
         "OpenML",
-        u"OpenML Documentation",
-        u"Matthias Feurer",
+        "OpenML Documentation",
+        "Matthias Feurer",
         "OpenML",
         "One line description of project.",
         "Miscellaneous",
diff --git a/doc/index.rst b/doc/index.rst
index 789979023..e38e4d877 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -32,7 +32,7 @@ Example
     )
     # Download the OpenML task for the german credit card dataset with 10-fold
     # cross-validation.
-    task = openml.tasks.get_task(31)
+    task = openml.tasks.get_task(32)
     # Run the scikit-learn model on the task.
     run = openml.runs.run_model_on_task(clf, task)
     # Publish the experiment on OpenML (optional, requires an API key.
diff --git a/doc/progress.rst b/doc/progress.rst
index 1956fcb42..1ca1e1d0e 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,6 +6,35 @@
 Changelog
 =========
 
+0.11.1
+~~~~~~
+* ADD #964: Validate ``ignore_attribute``, ``default_target_attribute``, ``row_id_attribute`` are set to attributes that exist on the dataset when calling ``create_dataset``.
+* ADD #979: Dataset features and qualities are now also cached in pickle format.
+* ADD #982: Add helper functions for column transformers.
+* ADD #989: ``run_model_on_task`` will now warn the user the the model passed has already been fitted.
+* ADD #1009 : Give possibility to not download the dataset qualities. The cached version is used even so download attribute is false.
+* ADD #1016: Add scikit-learn 0.24 support.
+* ADD #1020: Add option to parallelize evaluation of tasks with joblib.
+* ADD #1022: Allow minimum version of dependencies to be listed for a flow, use more accurate minimum versions for scikit-learn dependencies.
+* ADD #1023: Add admin-only calls for adding topics to datasets.
+* ADD #1029: Add support for fetching dataset from a minio server in parquet format.
+* ADD #1031: Generally improve runtime measurements, add them for some previously unsupported flows (e.g. BaseSearchCV derived flows).
+* DOC #973 : Change the task used in the welcome page example so it no longer fails using numerical dataset.
+* MAINT #671: Improved the performance of ``check_datasets_active`` by only querying the given list of datasets in contrast to querying all datasets. Modified the corresponding unit test.
+* MAINT #891: Changed the way that numerical features are stored. Numerical features that range from 0 to 255 are now stored as uint8, which reduces the storage space required as well as storing and loading times.
+* MAINT #975, #988: Add CI through Github Actions.
+* MAINT #977: Allow ``short`` and ``long`` scenarios for unit tests. Reduce the workload for some unit tests.
+* MAINT #985, #1000: Improve unit test stability and output readability, and adds load balancing.
+* MAINT #1018: Refactor data loading and storage. Data is now compressed on the first call to `get_data`.
+* MAINT #1024: Remove flaky decorator for study unit test.
+* FIX #883 #884 #906 #972: Various improvements to the caching system.
+* FIX #980: Speed up ``check_datasets_active``.
+* FIX #984: Add a retry mechanism when the server encounters a database issue.
+* FIX #1004: Fixed an issue that prevented installation on some systems (e.g. Ubuntu).
+* FIX #1013: Fixes a bug where ``OpenMLRun.setup_string`` was not uploaded to the server, prepares for ``run_details`` being sent from the server.
+* FIX #1021: Fixes an issue that could occur when running unit tests and openml-python was not in PATH.
+* FIX #1037: Fixes a bug where a dataset could not be loaded if a categorical value had listed nan-like as a possible category.
+
 0.11.0
 ~~~~~~
 * ADD #753: Allows uploading custom flows to OpenML via OpenML-Python.
diff --git a/examples/30_extended/custom_flow_tutorial.py b/examples/30_extended/custom_flow_.py
similarity index 98%
rename from examples/30_extended/custom_flow_tutorial.py
rename to examples/30_extended/custom_flow_.py
index 3b918e108..02aef9c5c 100644
--- a/examples/30_extended/custom_flow_tutorial.py
+++ b/examples/30_extended/custom_flow_.py
@@ -82,10 +82,10 @@
 # This allows people to specify auto-sklearn hyperparameters used in this flow.
 # In general, using a subflow is not required.
 #
-# Note: flow 15275 is not actually the right flow on the test server,
+# Note: flow 9313 is not actually the right flow on the test server,
 # but that does not matter for this demonstration.
 
-autosklearn_flow = openml.flows.get_flow(15275)  # auto-sklearn 0.5.1
+autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
 subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
 
 ####################################################################################################
@@ -120,7 +120,7 @@
     OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
 ]
 
-task_id = 1408  # Iris Task
+task_id = 1965  # Iris Task
 task = openml.tasks.get_task(task_id)
 dataset_id = task.get_dataset().dataset_id
 
diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py
index 594a58930..7a51cce70 100644
--- a/examples/30_extended/datasets_tutorial.py
+++ b/examples/30_extended/datasets_tutorial.py
@@ -112,7 +112,7 @@
 
 ############################################################################
 # Edit a created dataset
-# =================================================
+# ======================
 # This example uses the test server, to avoid editing a dataset on the main server.
 openml.config.start_using_configuration_for_example()
 ############################################################################
@@ -143,18 +143,23 @@
 # tasks associated with it. To edit critical fields of a dataset (without tasks) owned by you,
 # configure the API key:
 # openml.config.apikey = 'FILL_IN_OPENML_API_KEY'
-data_id = edit_dataset(564, default_target_attribute="y")
-print(f"Edited dataset ID: {data_id}")
-
+# This example here only shows a failure when trying to work on a dataset not owned by you:
+try:
+    data_id = edit_dataset(1, default_target_attribute="shape")
+except openml.exceptions.OpenMLServerException as e:
+    print(e)
 
 ############################################################################
 # Fork dataset
+# ============
 # Used to create a copy of the dataset with you as the owner.
 # Use this API only if you are unable to edit the critical fields (default_target_attribute,
 # ignore_attribute, row_id_attribute) of a dataset through the edit_dataset API.
 # After the dataset is forked, you can edit the new version of the dataset using edit_dataset.
 
-data_id = fork_dataset(564)
+data_id = fork_dataset(1)
+print(data_id)
+data_id = edit_dataset(data_id, default_target_attribute="shape")
 print(f"Forked dataset ID: {data_id}")
 
 openml.config.stop_using_configuration_for_example()
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index 76eb2f219..9f8c89375 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -53,7 +53,7 @@
 task = openml.tasks.get_task(403)
 
 # Build any classifier or pipeline
-clf = tree.ExtraTreeClassifier()
+clf = tree.DecisionTreeClassifier()
 
 # Run the flow
 run = openml.runs.run_model_on_task(clf, task)
@@ -82,13 +82,14 @@
 # ############################
 #
 # When you need to handle 'dirty' data, build pipelines to model then automatically.
-task = openml.tasks.get_task(1)
-features = task.get_dataset().features
-nominal_feature_indices = [
-    i
-    for i in range(len(features))
-    if features[i].name != task.target_name and features[i].data_type == "nominal"
-]
+# To demonstrate this using the dataset `credit-a <https://test.openml.org/d/16>`_ via
+# `task <https://test.openml.org/t/96>`_ as it contains both numerical and categorical
+# variables and missing values in both.
+task = openml.tasks.get_task(96)
+
+# OpenML helper functions for sklearn can be plugged in directly for complicated pipelines
+from openml.extensions.sklearn import cat, cont
+
 pipe = pipeline.Pipeline(
     steps=[
         (
@@ -96,20 +97,15 @@
             compose.ColumnTransformer(
                 [
                     (
-                        "Nominal",
-                        pipeline.Pipeline(
-                            [
-                                ("Imputer", impute.SimpleImputer(strategy="most_frequent")),
-                                (
-                                    "Encoder",
-                                    preprocessing.OneHotEncoder(
-                                        sparse=False, handle_unknown="ignore",
-                                    ),
-                                ),
-                            ]
-                        ),
-                        nominal_feature_indices,
+                        "categorical",
+                        preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+                        cat,  # returns the categorical feature indices
                     ),
+                    (
+                        "continuous",
+                        impute.SimpleImputer(strategy="median"),
+                        cont,
+                    ),  # returns the numeric feature indices
                 ]
             ),
         ),
@@ -121,6 +117,50 @@
 myrun = run.publish()
 print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))
 
+
+# The above pipeline works with the helper functions that internally deal with pandas DataFrame.
+# In the case, pandas is not available, or a NumPy based data processing is the requirement, the
+# above pipeline is presented below to work with NumPy.
+
+# Extracting the indices of the categorical columns
+features = task.get_dataset().features
+categorical_feature_indices = []
+numeric_feature_indices = []
+for i in range(len(features)):
+    if features[i].name == task.target_name:
+        continue
+    if features[i].data_type == "nominal":
+        categorical_feature_indices.append(i)
+    else:
+        numeric_feature_indices.append(i)
+
+pipe = pipeline.Pipeline(
+    steps=[
+        (
+            "Preprocessing",
+            compose.ColumnTransformer(
+                [
+                    (
+                        "categorical",
+                        preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
+                        categorical_feature_indices,
+                    ),
+                    (
+                        "continuous",
+                        impute.SimpleImputer(strategy="median"),
+                        numeric_feature_indices,
+                    ),
+                ]
+            ),
+        ),
+        ("Classifier", ensemble.RandomForestClassifier(n_estimators=10)),
+    ]
+)
+
+run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False, dataset_format="array")
+myrun = run.publish()
+print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))
+
 ###############################################################################
 # Running flows on tasks offline for later upload
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -132,7 +172,9 @@
 task = openml.tasks.get_task(6)
 
 # The following lines can then be executed offline:
-run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False, upload_flow=False)
+run = openml.runs.run_model_on_task(
+    pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
+)
 
 # The run may be stored offline, and the flow will be stored along with it:
 run.to_filesystem(directory="myrun")
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index a46bf9699..8579d1d38 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -34,14 +34,14 @@
 
 import numpy as np
 import openml
-import sklearn.ensemble
-import sklearn.impute
-import sklearn.preprocessing
+from openml.extensions.sklearn import cat, cont
+
 from sklearn.pipeline import make_pipeline, Pipeline
 from sklearn.compose import ColumnTransformer
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
-from sklearn.experimental import enable_hist_gradient_boosting
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.decomposition import TruncatedSVD
 
 
 openml.config.start_using_configuration_for_example()
@@ -58,37 +58,20 @@
 # many potential hyperparameters. Of course, the model can be as complex and as
 # easy as you want it to be
 
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.decomposition import TruncatedSVD
-
-
-# Helper functions to return required columns for ColumnTransformer
-def cont(X):
-    return X.dtypes != "category"
-
-
-def cat(X):
-    return X.dtypes == "category"
-
 
-cat_imp = make_pipeline(
-    SimpleImputer(strategy="most_frequent"),
-    OneHotEncoder(handle_unknown="ignore", sparse=False),
-    TruncatedSVD(),
-)
-ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", "passthrough", cont)])
-model_original = sklearn.pipeline.Pipeline(
-    steps=[("transform", ct), ("estimator", HistGradientBoostingClassifier()),]
-)
+cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
+cont_imp = SimpleImputer(strategy="median")
+ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
+model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
 
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for
 # the purpose of this tutorial we set them to some specific values that might
 # or might not be optimal
 hyperparameters_original = {
-    "estimator__loss": "auto",
-    "estimator__learning_rate": 0.15,
-    "estimator__max_iter": 50,
+    "estimator__criterion": "gini",
+    "estimator__n_estimators": 50,
+    "estimator__max_depth": 10,
     "estimator__min_samples_leaf": 1,
 }
 model_original.set_params(**hyperparameters_original)
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index c02a5c038..3c93a7e81 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -15,13 +15,7 @@
 
 import uuid
 
-import numpy as np
-import sklearn.tree
-from sklearn.pipeline import make_pipeline, Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.impute import SimpleImputer
-from sklearn.decomposition import TruncatedSVD
-from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
+from sklearn.ensemble import RandomForestClassifier
 
 import openml
 
@@ -71,45 +65,25 @@
 )
 print(evaluations.head())
 
-###########################################################from openml.testing import cat, cont#################
+############################################################################
 # Uploading studies
 # =================
 #
 # Creating a study is as simple as creating any kind of other OpenML entity.
 # In this examples we'll create a few runs for the OpenML-100 benchmark
 # suite which is available on the OpenML test server.
-
 openml.config.start_using_configuration_for_example()
 
-# Model that can handle missing values
-from sklearn.experimental import enable_hist_gradient_boosting
-from sklearn.ensemble import HistGradientBoostingClassifier
-
-
-# Helper functions to return required columns for ColumnTransformer
-def cont(X):
-    return X.dtypes != "category"
-
-
-def cat(X):
-    return X.dtypes == "category"
+# Model to be used
+clf = RandomForestClassifier()
 
+# We'll create a study with one run on 3 datasets present in the suite
+tasks = [115, 259, 307]
 
-cat_imp = make_pipeline(
-    SimpleImputer(strategy="most_frequent"),
-    OneHotEncoder(handle_unknown="ignore", sparse=False),
-    TruncatedSVD(),
-)
-ct = ColumnTransformer(
-    [("cat", cat_imp, cat), ("cont", FunctionTransformer(lambda x: x, validate=False), cont)]
-)
-clf = sklearn.pipeline.Pipeline(
-    steps=[("transform", ct), ("estimator", HistGradientBoostingClassifier()),]
-)
-
+# To verify
 suite = openml.study.get_suite(1)
-# We'll create a study with one run on three random datasets each
-tasks = np.random.choice(suite.tasks, size=3, replace=False)
+print(all([t_id in suite.tasks for t_id in tasks]))
+
 run_ids = []
 for task_id in tasks:
     task = openml.tasks.get_task(task_id)
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index c879e9fea..533f645b2 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -61,11 +61,11 @@
 ####################################################################################################
 # And then split the data based on this:
 
-X, y, _, _ = task.get_dataset().get_data(task.target_name)
-X_train = X.loc[train_indices]
-y_train = y[train_indices]
-X_test = X.loc[test_indices]
-y_test = y[test_indices]
+X, y = task.get_X_and_y(dataset_format="dataframe")
+X_train = X.iloc[train_indices]
+y_train = y.iloc[train_indices]
+X_test = X.iloc[test_indices]
+y_test = y.iloc[test_indices]
 
 print(
     "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
@@ -78,6 +78,7 @@
 
 task_id = 3
 task = openml.tasks.get_task(task_id)
+X, y = task.get_X_and_y(dataset_format="dataframe")
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
@@ -93,10 +94,10 @@
             train_indices, test_indices = task.get_train_test_split_indices(
                 repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
-            X_train = X.loc[train_indices]
-            y_train = y[train_indices]
-            X_test = X.loc[test_indices]
-            y_test = y[test_indices]
+            X_train = X.iloc[train_indices]
+            y_train = y.iloc[train_indices]
+            X_test = X.iloc[test_indices]
+            y_test = y.iloc[test_indices]
 
             print(
                 "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
@@ -116,6 +117,7 @@
 
 task_id = 1767
 task = openml.tasks.get_task(task_id)
+X, y = task.get_X_and_y(dataset_format="dataframe")
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
@@ -131,10 +133,10 @@
             train_indices, test_indices = task.get_train_test_split_indices(
                 repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
-            X_train = X.loc[train_indices]
-            y_train = y[train_indices]
-            X_test = X.loc[test_indices]
-            y_test = y[test_indices]
+            X_train = X.iloc[train_indices]
+            y_train = y.iloc[train_indices]
+            X_test = X.iloc[test_indices]
+            y_test = y.iloc[test_indices]
 
             print(
                 "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
@@ -154,6 +156,7 @@
 
 task_id = 1702
 task = openml.tasks.get_task(task_id)
+X, y = task.get_X_and_y(dataset_format="dataframe")
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
@@ -169,10 +172,10 @@
             train_indices, test_indices = task.get_train_test_split_indices(
                 repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
             )
-            X_train = X.loc[train_indices]
-            y_train = y[train_indices]
-            X_test = X.loc[test_indices]
-            y_test = y[test_indices]
+            X_train = X.iloc[train_indices]
+            y_train = y.iloc[train_indices]
+            X_test = X.iloc[test_indices]
+            y_test = y.iloc[test_indices]
 
             print(
                 "Repeat #{}, fold #{}, samples {}: X_train.shape: {}, "
diff --git a/examples/40_paper/2018_neurips_perrone_example.py b/examples/40_paper/2018_neurips_perrone_example.py
index 60d212116..5ae339ae2 100644
--- a/examples/40_paper/2018_neurips_perrone_example.py
+++ b/examples/40_paper/2018_neurips_perrone_example.py
@@ -177,18 +177,14 @@ def list_categorical_attributes(flow_type="svm"):
 cat_cols = list_categorical_attributes(flow_type=flow_type)
 num_cols = list(set(X.columns) - set(cat_cols))
 
-# Missing value imputers
-cat_imputer = SimpleImputer(missing_values=np.nan, strategy="constant", fill_value="None")
+# Missing value imputers for numeric columns
 num_imputer = SimpleImputer(missing_values=np.nan, strategy="constant", fill_value=-1)
 
-# Creating the one-hot encoder
+# Creating the one-hot encoder for numerical representation of categorical columns
 enc = OneHotEncoder(handle_unknown="ignore")
 
-# Pipeline to handle categorical column transformations
-cat_transforms = Pipeline(steps=[("impute", cat_imputer), ("encode", enc)])
-
 # Combining column transformers
-ct = ColumnTransformer([("cat", cat_transforms, cat_cols), ("num", num_imputer, num_cols)])
+ct = ColumnTransformer([("cat", enc, cat_cols), ("num", num_imputer, num_cols)])
 
 # Creating the full pipeline with the surrogate model
 clf = RandomForestRegressor(n_estimators=50)
diff --git a/openml/__version__.py b/openml/__version__.py
index 07c9a950d..ff4effa59 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.11.0"
+__version__ = "0.12.0"
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 57599b912..aee67d8c6 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -3,9 +3,14 @@
 import time
 import hashlib
 import logging
+import pathlib
 import requests
+import urllib.parse
+import xml
 import xmltodict
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
+
+import minio
 
 from . import config
 from .exceptions import (
@@ -55,7 +60,7 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     if file_elements is not None:
         if request_method != "post":
             raise ValueError("request method must be post when file elements are present")
-        response = __read_url_files(url, data=data, file_elements=file_elements)
+        response = _read_url_files(url, data=data, file_elements=file_elements)
     else:
         response = __read_url(url, request_method, data)
 
@@ -67,6 +72,45 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     return response.text
 
 
+def _download_minio_file(
+    source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
+) -> None:
+    """ Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+
+    Parameters
+    ----------
+    source : Union[str, pathlib.Path]
+        URL to a file in a MinIO bucket.
+    destination : str
+        Path to store the file to, if a directory is provided the original filename is used.
+    exists_ok : bool, optional (default=True)
+        If False, raise FileExists if a file already exists in ``destination``.
+
+    """
+    destination = pathlib.Path(destination)
+    parsed_url = urllib.parse.urlparse(source)
+
+    # expect path format: /BUCKET/path/to/file.ext
+    bucket, object_name = parsed_url.path[1:].split("/", maxsplit=1)
+    if destination.is_dir():
+        destination = pathlib.Path(destination, object_name)
+    if destination.is_file() and not exists_ok:
+        raise FileExistsError(f"File already exists in {destination}.")
+
+    client = minio.Minio(endpoint=parsed_url.netloc, secure=False)
+
+    try:
+        client.fget_object(
+            bucket_name=bucket, object_name=object_name, file_path=str(destination),
+        )
+    except minio.error.S3Error as e:
+        if e.message.startswith("Object does not exist"):
+            raise FileNotFoundError(f"Object at '{source}' does not exist.") from e
+        # e.g. permission error, or a bucket does not exist (which is also interpreted as a
+        # permission error on minio level).
+        raise FileNotFoundError("Bucket does not exist or is private.") from e
+
+
 def _download_text_file(
     source: str,
     output_path: Optional[str] = None,
@@ -105,21 +149,9 @@ def _download_text_file(
 
     logging.info("Starting [%s] request for the URL %s", "get", source)
     start = time.time()
-    response = __read_url(source, request_method="get")
-    __check_response(response, source, None)
+    response = __read_url(source, request_method="get", md5_checksum=md5_checksum)
     downloaded_file = response.text
 
-    if md5_checksum is not None:
-        md5 = hashlib.md5()
-        md5.update(downloaded_file.encode("utf-8"))
-        md5_checksum_download = md5.hexdigest()
-        if md5_checksum != md5_checksum_download:
-            raise OpenMLHashException(
-                "Checksum {} of downloaded file is unequal to the expected checksum {}.".format(
-                    md5_checksum_download, md5_checksum
-                )
-            )
-
     if output_path is None:
         logging.info(
             "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
@@ -138,15 +170,6 @@ def _download_text_file(
         return None
 
 
-def __check_response(response, url, file_elements):
-    if response.status_code != 200:
-        raise __parse_server_exception(response, url, file_elements=file_elements)
-    elif (
-        "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip"
-    ):
-        logging.warning("Received uncompressed content from OpenML for {}.".format(url))
-
-
 def _file_id_to_url(file_id, filename=None):
     """
      Presents the URL how to download a given file id
@@ -159,7 +182,7 @@ def _file_id_to_url(file_id, filename=None):
     return url
 
 
-def __read_url_files(url, data=None, file_elements=None):
+def _read_url_files(url, data=None, file_elements=None):
     """do a post request to url with data
     and sending file_elements as files"""
 
@@ -169,26 +192,37 @@ def __read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = __send_request(request_method="post", url=url, data=data, files=file_elements,)
+    response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
     return response
 
 
-def __read_url(url, request_method, data=None):
+def __read_url(url, request_method, data=None, md5_checksum=None):
     data = {} if data is None else data
-    if config.apikey is not None:
+    if config.apikey:
         data["api_key"] = config.apikey
+    return _send_request(
+        request_method=request_method, url=url, data=data, md5_checksum=md5_checksum
+    )
+
+
+def __is_checksum_equal(downloaded_file, md5_checksum=None):
+    if md5_checksum is None:
+        return True
+    md5 = hashlib.md5()
+    md5.update(downloaded_file.encode("utf-8"))
+    md5_checksum_download = md5.hexdigest()
+    if md5_checksum == md5_checksum_download:
+        return True
+    return False
 
-    return __send_request(request_method=request_method, url=url, data=data)
 
+def _send_request(request_method, url, data, files=None, md5_checksum=None):
+    n_retries = max(1, min(config.connection_n_retries, config.max_retries))
 
-def __send_request(
-    request_method, url, data, files=None,
-):
-    n_retries = config.connection_n_retries
     response = None
     with requests.Session() as session:
         # Start at one to have a non-zero multiplier for the sleep
-        for i in range(1, n_retries + 1):
+        for retry_counter in range(1, n_retries + 1):
             try:
                 if request_method == "get":
                     response = session.get(url, params=data)
@@ -198,17 +232,51 @@ def __send_request(
                     response = session.post(url, data=data, files=files)
                 else:
                     raise NotImplementedError()
+                __check_response(response=response, url=url, file_elements=files)
+                if request_method == "get" and not __is_checksum_equal(response.text, md5_checksum):
+                    raise OpenMLHashException(
+                        "Checksum of downloaded file is unequal to the expected checksum {} "
+                        "when downloading {}.".format(md5_checksum, url)
+                    )
                 break
-            except (requests.exceptions.ConnectionError, requests.exceptions.SSLError,) as e:
-                if i == n_retries:
-                    raise e
+            except (
+                requests.exceptions.ConnectionError,
+                requests.exceptions.SSLError,
+                OpenMLServerException,
+                xml.parsers.expat.ExpatError,
+                OpenMLHashException,
+            ) as e:
+                if isinstance(e, OpenMLServerException):
+                    if e.code not in [107, 500]:
+                        # 107: database connection error
+                        # 500: internal server error
+                        raise
+                elif isinstance(e, xml.parsers.expat.ExpatError):
+                    if request_method != "get" or retry_counter >= n_retries:
+                        raise OpenMLServerError(
+                            "Unexpected server error when calling {}. Please contact the "
+                            "developers!\nStatus code: {}\n{}".format(
+                                url, response.status_code, response.text,
+                            )
+                        )
+                if retry_counter >= n_retries:
+                    raise
                 else:
-                    time.sleep(0.1 * i)
+                    time.sleep(retry_counter)
     if response is None:
         raise ValueError("This should never happen!")
     return response
 
 
+def __check_response(response, url, file_elements):
+    if response.status_code != 200:
+        raise __parse_server_exception(response, url, file_elements=file_elements)
+    elif (
+        "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip"
+    ):
+        logging.warning("Received uncompressed content from OpenML for {}.".format(url))
+
+
 def __parse_server_exception(
     response: requests.Response, url: str, file_elements: Dict,
 ) -> OpenMLServerError:
@@ -217,6 +285,8 @@ def __parse_server_exception(
         raise OpenMLServerError("URI too long! ({})".format(url))
     try:
         server_exception = xmltodict.parse(response.text)
+    except xml.parsers.expat.ExpatError:
+        raise
     except Exception:
         # OpenML has a sophisticated error system
         # where information about failures is provided. try to parse this
diff --git a/openml/config.py b/openml/config.py
index 296b71663..9e2e697d5 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -7,6 +7,8 @@
 import logging
 import logging.handlers
 import os
+from pathlib import Path
+import platform
 from typing import Tuple, cast
 
 from io import StringIO
@@ -19,7 +21,7 @@
 file_handler = None
 
 
-def _create_log_handlers():
+def _create_log_handlers(create_file_handler=True):
     """ Creates but does not attach the log handlers. """
     global console_handler, file_handler
     if console_handler is not None or file_handler is not None:
@@ -32,12 +34,13 @@ def _create_log_handlers():
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(output_formatter)
 
-    one_mb = 2 ** 20
-    log_path = os.path.join(cache_directory, "openml_python.log")
-    file_handler = logging.handlers.RotatingFileHandler(
-        log_path, maxBytes=one_mb, backupCount=1, delay=True
-    )
-    file_handler.setFormatter(output_formatter)
+    if create_file_handler:
+        one_mb = 2 ** 20
+        log_path = os.path.join(cache_directory, "openml_python.log")
+        file_handler = logging.handlers.RotatingFileHandler(
+            log_path, maxBytes=one_mb, backupCount=1, delay=True
+        )
+        file_handler.setFormatter(output_formatter)
 
 
 def _convert_log_levels(log_level: int) -> Tuple[int, int]:
@@ -83,14 +86,18 @@ def set_file_log_level(file_output_level: int):
 
 # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
 _defaults = {
-    "apikey": None,
+    "apikey": "",
     "server": "https://www.openml.org/api/v1/xml",
-    "cachedir": os.path.expanduser(os.path.join("~", ".openml", "cache")),
+    "cachedir": (
+        os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
+        if platform.system() == "Linux"
+        else os.path.join("~", ".openml")
+    ),
     "avoid_duplicate_runs": "True",
-    "connection_n_retries": 2,
+    "connection_n_retries": "10",
+    "max_retries": "20",
 }
 
-config_file = os.path.expanduser(os.path.join("~", ".openml", "config"))
 
 # Default values are actually added here in the _setup() function which is
 # called at the end of this module
@@ -115,7 +122,8 @@ def get_server_base_url() -> str:
 avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False
 
 # Number of retries if the connection breaks
-connection_n_retries = _defaults["connection_n_retries"]
+connection_n_retries = int(_defaults["connection_n_retries"])
+max_retries = int(_defaults["max_retries"])
 
 
 class ConfigurationForExamples:
@@ -169,7 +177,7 @@ def stop_using_configuration_for_example(cls):
         cls._start_last_called = False
 
 
-def _setup():
+def _setup(config=None):
     """Setup openml package. Called on first import.
 
     Reads the config file and sets up apikey, server, cache appropriately.
@@ -183,62 +191,102 @@ def _setup():
     global cache_directory
     global avoid_duplicate_runs
     global connection_n_retries
+    global max_retries
 
-    # read config file, create cache directory
-    try:
-        os.mkdir(os.path.expanduser(os.path.join("~", ".openml")))
-    except FileExistsError:
-        # For other errors, we want to propagate the error as openml does not work without cache
-        pass
+    if platform.system() == "Linux":
+        config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml"))
+    else:
+        config_dir = Path("~") / ".openml"
+    # Still use os.path.expanduser to trigger the mock in the unit test
+    config_dir = Path(os.path.expanduser(config_dir))
+    config_file = config_dir / "config"
+
+    # read config file, create directory for config file
+    if not os.path.exists(config_dir):
+        try:
+            os.mkdir(config_dir)
+            cache_exists = True
+        except PermissionError:
+            cache_exists = False
+    else:
+        cache_exists = True
 
-    config = _parse_config()
-    apikey = config.get("FAKE_SECTION", "apikey")
-    server = config.get("FAKE_SECTION", "server")
+    if config is None:
+        config = _parse_config(config_file)
 
-    short_cache_dir = config.get("FAKE_SECTION", "cachedir")
-    cache_directory = os.path.expanduser(short_cache_dir)
+        def _get(config, key):
+            return config.get("FAKE_SECTION", key)
+
+        avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
+    else:
+
+        def _get(config, key):
+            return config.get(key)
+
+        avoid_duplicate_runs = config.get("avoid_duplicate_runs")
 
+    apikey = _get(config, "apikey")
+    server = _get(config, "server")
+    short_cache_dir = _get(config, "cachedir")
+    connection_n_retries = int(_get(config, "connection_n_retries"))
+    max_retries = int(_get(config, "max_retries"))
+
+    cache_directory = os.path.expanduser(short_cache_dir)
     # create the cache subdirectory
-    try:
-        os.mkdir(cache_directory)
-    except FileExistsError:
-        # For other errors, we want to propagate the error as openml does not work without cache
-        pass
-
-    avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
-    connection_n_retries = config.get("FAKE_SECTION", "connection_n_retries")
-    if connection_n_retries > 20:
+    if not os.path.exists(cache_directory):
+        try:
+            os.mkdir(cache_directory)
+        except PermissionError:
+            openml_logger.warning(
+                "No permission to create openml cache directory at %s! This can result in "
+                "OpenML-Python not working properly." % cache_directory
+            )
+
+    if cache_exists:
+        _create_log_handlers()
+    else:
+        _create_log_handlers(create_file_handler=False)
+        openml_logger.warning(
+            "No permission to create OpenML directory at %s! This can result in OpenML-Python "
+            "not working properly." % config_dir
+        )
+
+    if connection_n_retries > max_retries:
         raise ValueError(
-            "A higher number of retries than 20 is not allowed to keep the "
-            "server load reasonable"
+            "A higher number of retries than {} is not allowed to keep the "
+            "server load reasonable".format(max_retries)
         )
 
 
-def _parse_config():
+def _parse_config(config_file: str):
     """ Parse the config file, set up defaults. """
     config = configparser.RawConfigParser(defaults=_defaults)
 
-    if not os.path.exists(config_file):
-        # Create an empty config file if there was none so far
-        fh = open(config_file, "w")
-        fh.close()
-        logger.info(
-            "Could not find a configuration file at %s. Going to "
-            "create an empty file there." % config_file
-        )
-
+    # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
+    # Cheat the ConfigParser module by adding a fake section header
+    config_file_ = StringIO()
+    config_file_.write("[FAKE_SECTION]\n")
     try:
-        # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
-        # Cheat the ConfigParser module by adding a fake section header
-        config_file_ = StringIO()
-        config_file_.write("[FAKE_SECTION]\n")
         with open(config_file) as fh:
             for line in fh:
                 config_file_.write(line)
-        config_file_.seek(0)
-        config.read_file(config_file_)
+    except FileNotFoundError:
+        logger.info("No config file found at %s, using default configuration.", config_file)
     except OSError as e:
-        logger.info("Error opening file %s: %s", config_file, e.message)
+        logger.info("Error opening file %s: %s", config_file, e.args[0])
+    config_file_.seek(0)
+    config.read_file(config_file_)
+    return config
+
+
+def get_config_as_dict():
+    config = dict()
+    config["apikey"] = apikey
+    config["server"] = server
+    config["cachedir"] = cache_directory
+    config["avoid_duplicate_runs"] = avoid_duplicate_runs
+    config["connection_n_retries"] = connection_n_retries
+    config["max_retries"] = max_retries
     return config
 
 
@@ -253,11 +301,7 @@ def get_cache_directory():
     """
     url_suffix = urlparse(server).netloc
     reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1])
-    if not cache_directory:
-        _cachedir = _defaults(cache_directory)
-    else:
-        _cachedir = cache_directory
-    _cachedir = os.path.join(_cachedir, reversed_url_suffix)
+    _cachedir = os.path.join(cache_directory, reversed_url_suffix)
     return _cachedir
 
 
@@ -285,12 +329,13 @@ def set_cache_directory(cachedir):
 )
 stop_using_configuration_for_example = ConfigurationForExamples.stop_using_configuration_for_example
 
+
 __all__ = [
     "get_cache_directory",
     "set_cache_directory",
     "start_using_configuration_for_example",
     "stop_using_configuration_for_example",
+    "get_config_as_dict",
 ]
 
 _setup()
-_create_log_handlers()
diff --git a/openml/datasets/data_feature.py b/openml/datasets/data_feature.py
index eb727b000..a1e2556be 100644
--- a/openml/datasets/data_feature.py
+++ b/openml/datasets/data_feature.py
@@ -1,5 +1,7 @@
 # License: BSD 3-Clause
 
+from typing import List
+
 
 class OpenMLDataFeature(object):
     """
@@ -20,7 +22,14 @@ class OpenMLDataFeature(object):
 
     LEGAL_DATA_TYPES = ["nominal", "numeric", "string", "date"]
 
-    def __init__(self, index, name, data_type, nominal_values, number_missing_values):
+    def __init__(
+        self,
+        index: int,
+        name: str,
+        data_type: str,
+        nominal_values: List[str],
+        number_missing_values: int,
+    ):
         if type(index) != int:
             raise ValueError("Index is of wrong datatype")
         if data_type not in self.LEGAL_DATA_TYPES:
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 8c366dfb8..0c065b855 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -3,7 +3,6 @@
 from collections import OrderedDict
 import re
 import gzip
-import io
 import logging
 import os
 import pickle
@@ -13,7 +12,7 @@
 import numpy as np
 import pandas as pd
 import scipy.sparse
-from warnings import warn
+import xmltodict
 
 from openml.base import OpenMLBase
 from .data_feature import OpenMLDataFeature
@@ -34,7 +33,7 @@ class OpenMLDataset(OpenMLBase):
         Name of the dataset.
     description : str
         Description of the dataset.
-    format : str
+    data_format : str
         Format of the dataset which can be either 'arff' or 'sparse_arff'.
     cache_format : str
         Format for caching the dataset which can be either 'feather' or 'pickle'.
@@ -97,13 +96,16 @@ class OpenMLDataset(OpenMLBase):
         which maps a quality name to a quality value.
     dataset: string, optional
         Serialized arff dataset string.
+    minio_url: string, optional
+        URL to the MinIO bucket with dataset files
+    parquet_file: string, optional
+        Path to the local parquet file.
     """
 
     def __init__(
         self,
         name,
         description,
-        format=None,
         data_format="arff",
         cache_format="pickle",
         dataset_id=None,
@@ -127,9 +129,11 @@ def __init__(
         update_comment=None,
         md5_checksum=None,
         data_file=None,
-        features=None,
-        qualities=None,
+        features_file: Optional[str] = None,
+        qualities_file: Optional[str] = None,
         dataset=None,
+        minio_url: Optional[str] = None,
+        parquet_file: Optional[str] = None,
     ):
         def find_invalid_characters(string, pattern):
             invalid_chars = set()
@@ -178,16 +182,8 @@ def find_invalid_characters(string, pattern):
             )
 
         self.cache_format = cache_format
-        if format is None:
-            self.format = data_format
-        else:
-            warn(
-                "The format parameter in the init will be deprecated "
-                "in the future."
-                "Please use data_format instead",
-                DeprecationWarning,
-            )
-            self.format = format
+        # Has to be called format, otherwise there will be an XML upload error
+        self.format = data_format
         self.creator = creator
         self.contributor = contributor
         self.collection_date = collection_date
@@ -198,7 +194,7 @@ def find_invalid_characters(string, pattern):
         self.default_target_attribute = default_target_attribute
         self.row_id_attribute = row_id_attribute
         if isinstance(ignore_attribute, str):
-            self.ignore_attribute = [ignore_attribute]
+            self.ignore_attribute = [ignore_attribute]  # type: Optional[List[str]]
         elif isinstance(ignore_attribute, list) or ignore_attribute is None:
             self.ignore_attribute = ignore_attribute
         else:
@@ -212,39 +208,31 @@ def find_invalid_characters(string, pattern):
         self.update_comment = update_comment
         self.md5_checksum = md5_checksum
         self.data_file = data_file
-        self.features = None
-        self.qualities = None
+        self.parquet_file = parquet_file
         self._dataset = dataset
+        self._minio_url = minio_url
 
-        if features is not None:
-            self.features = {}
-            for idx, xmlfeature in enumerate(features["oml:feature"]):
-                nr_missing = xmlfeature.get("oml:number_of_missing_values", 0)
-                feature = OpenMLDataFeature(
-                    int(xmlfeature["oml:index"]),
-                    xmlfeature["oml:name"],
-                    xmlfeature["oml:data_type"],
-                    xmlfeature.get("oml:nominal_value"),
-                    int(nr_missing),
-                )
-                if idx != feature.index:
-                    raise ValueError("Data features not provided " "in right order")
-                self.features[feature.index] = feature
+        if features_file is not None:
+            self.features = _read_features(
+                features_file
+            )  # type: Optional[Dict[int, OpenMLDataFeature]]
+        else:
+            self.features = None
 
-        self.qualities = _check_qualities(qualities)
+        if qualities_file:
+            self.qualities = _read_qualities(qualities_file)  # type: Optional[Dict[str, float]]
+        else:
+            self.qualities = None
 
         if data_file is not None:
-            (
-                self.data_pickle_file,
-                self.data_feather_file,
-                self.feather_attribute_file,
-            ) = self._create_pickle_in_cache(data_file)
+            rval = self._compressed_cache_file_paths(data_file)
+            self.data_pickle_file = rval[0] if os.path.exists(rval[0]) else None
+            self.data_feather_file = rval[1] if os.path.exists(rval[1]) else None
+            self.feather_attribute_file = rval[2] if os.path.exists(rval[2]) else None
         else:
-            self.data_pickle_file, self.data_feather_file, self.feather_attribute_file = (
-                None,
-                None,
-                None,
-            )
+            self.data_pickle_file = None
+            self.data_feather_file = None
+            self.feather_attribute_file = None
 
     @property
     def id(self) -> Optional[int]:
@@ -311,9 +299,11 @@ def __eq__(self, other):
     def _download_data(self) -> None:
         """ Download ARFF data file to standard cache directory. Set `self.data_file`. """
         # import required here to avoid circular import.
-        from .functions import _get_dataset_arff
+        from .functions import _get_dataset_arff, _get_dataset_parquet
 
         self.data_file = _get_dataset_arff(self)
+        if self._minio_url is not None:
+            self.parquet_file = _get_dataset_parquet(self)
 
     def _get_arff(self, format: str) -> Dict:
         """Read ARFF file and return decoded arff.
@@ -367,7 +357,7 @@ def decode_arff(fh):
             with gzip.open(filename) as fh:
                 return decode_arff(fh)
         else:
-            with io.open(filename, encoding="utf8") as fh:
+            with open(filename, encoding="utf8") as fh:
                 return decode_arff(fh)
 
     def _parse_data_from_arff(
@@ -407,7 +397,7 @@ def _parse_data_from_arff(
         categories_names = {}
         categorical = []
         for i, (name, type_) in enumerate(data["attributes"]):
-            # if the feature is nominal and the a sparse matrix is
+            # if the feature is nominal and a sparse matrix is
             # requested, the categories need to be numeric
             if isinstance(type_, list) and self.format.lower() == "sparse_arff":
                 try:
@@ -415,12 +405,10 @@ def _parse_data_from_arff(
                     # can be encoded into integers
                     pd.factorize(type_)[0]
                 except ValueError:
-                    raise ValueError(
-                        "Categorical data needs to be numeric when " "using sparse ARFF."
-                    )
+                    raise ValueError("Categorical data needs to be numeric when using sparse ARFF.")
             # string can only be supported with pandas DataFrame
             elif type_ == "STRING" and self.format.lower() == "sparse_arff":
-                raise ValueError("Dataset containing strings is not supported " "with sparse ARFF.")
+                raise ValueError("Dataset containing strings is not supported with sparse ARFF.")
 
             # infer the dtype from the ARFF header
             if isinstance(type_, list):
@@ -456,6 +444,17 @@ def _parse_data_from_arff(
                     col.append(
                         self._unpack_categories(X[column_name], categories_names[column_name])
                     )
+                elif attribute_dtype[column_name] in ("floating", "integer"):
+                    X_col = X[column_name]
+                    if X_col.min() >= 0 and X_col.max() <= 255:
+                        try:
+                            X_col_uint = X_col.astype("uint8")
+                            if (X_col == X_col_uint).all():
+                                col.append(X_col_uint)
+                                continue
+                        except ValueError:
+                            pass
+                    col.append(X[column_name])
                 else:
                     col.append(X[column_name])
             X = pd.concat(col, axis=1)
@@ -464,152 +463,117 @@ def _parse_data_from_arff(
 
         return X, categorical, attribute_names
 
-    def _create_pickle_in_cache(self, data_file: str) -> Tuple[str, str, str]:
-        """ Parse the arff and pickle the result. Update any old pickle objects. """
-        data_pickle_file = data_file.replace(".arff", ".pkl.py3")
-        data_feather_file = data_file.replace(".arff", ".feather")
-        feather_attribute_file = data_file.replace(".arff", ".feather.attributes.pkl.py3")
-        if os.path.exists(data_pickle_file) and self.cache_format == "pickle":
-            # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
-            with open(data_pickle_file, "rb") as fh:
-                try:
-                    data, categorical, attribute_names = pickle.load(fh)
-                except EOFError:
-                    # The file is likely corrupt, see #780.
-                    # We deal with this when loading the data in `_load_data`.
-                    return data_pickle_file, data_feather_file, feather_attribute_file
-                except ModuleNotFoundError:
-                    # There was some issue loading the file, see #918
-                    # We deal with this when loading the data in `_load_data`.
-                    return data_pickle_file, data_feather_file, feather_attribute_file
-                except ValueError as e:
-                    if "unsupported pickle protocol" in e.args[0]:
-                        # There was some issue loading the file, see #898
-                        # We deal with this when loading the data in `_load_data`.
-                        return data_pickle_file, data_feather_file, feather_attribute_file
-                    else:
-                        raise
-
-            # Between v0.8 and v0.9 the format of pickled data changed from
-            # np.ndarray to pd.DataFrame. This breaks some backwards compatibility,
-            # e.g. for `run_model_on_task`. If a local file still exists with
-            # np.ndarray data, we reprocess the data file to store a pickled
-            # pd.DataFrame blob. See also #646.
-            if isinstance(data, pd.DataFrame) or scipy.sparse.issparse(data):
-                logger.debug("Data pickle file already exists and is up to date.")
-                return data_pickle_file, data_feather_file, feather_attribute_file
-        elif os.path.exists(data_feather_file) and self.cache_format == "feather":
-            # Load the data to check if the pickle file is outdated (i.e. contains numpy array)
-            try:
-                data = pd.read_feather(data_feather_file)
-            except EOFError:
-                # The file is likely corrupt, see #780.
-                # We deal with this when loading the data in `_load_data`.
-                return data_pickle_file, data_feather_file, feather_attribute_file
-            except ModuleNotFoundError:
-                # There was some issue loading the file, see #918
-                # We deal with this when loading the data in `_load_data`.
-                return data_pickle_file, data_feather_file, feather_attribute_file
-            except ValueError as e:
-                if "unsupported pickle protocol" in e.args[0]:
-                    # There was some issue loading the file, see #898
-                    # We deal with this when loading the data in `_load_data`.
-                    return data_pickle_file, data_feather_file, feather_attribute_file
-                else:
-                    raise
+    def _compressed_cache_file_paths(self, data_file: str) -> Tuple[str, str, str]:
+        ext = f".{data_file.split('.')[-1]}"
+        data_pickle_file = data_file.replace(ext, ".pkl.py3")
+        data_feather_file = data_file.replace(ext, ".feather")
+        feather_attribute_file = data_file.replace(ext, ".feather.attributes.pkl.py3")
+        return data_pickle_file, data_feather_file, feather_attribute_file
+
+    def _cache_compressed_file_from_file(
+        self, data_file: str
+    ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
+        """ Store data from the local file in compressed format.
 
-            logger.debug("Data feather file already exists and is up to date.")
-            return data_pickle_file, data_feather_file, feather_attribute_file
+        If a local parquet file is present it will be used instead of the arff file.
+        Sets cache_format to 'pickle' if data is sparse.
+        """
+        (
+            data_pickle_file,
+            data_feather_file,
+            feather_attribute_file,
+        ) = self._compressed_cache_file_paths(data_file)
+
+        if data_file.endswith(".arff"):
+            data, categorical, attribute_names = self._parse_data_from_arff(data_file)
+        elif data_file.endswith(".pq"):
+            try:
+                data = pd.read_parquet(data_file)
+            except Exception as e:
+                raise Exception(f"File: {data_file}") from e
 
-        # At this point either the pickle file does not exist, or it had outdated formatting.
-        # We parse the data from arff again and populate the cache with a recent pickle file.
-        X, categorical, attribute_names = self._parse_data_from_arff(data_file)
+            categorical = [data[c].dtype.name == "category" for c in data.columns]
+            attribute_names = list(data.columns)
+        else:
+            raise ValueError(f"Unknown file type for file '{data_file}'.")
 
         # Feather format does not work for sparse datasets, so we use pickle for sparse datasets
+        if scipy.sparse.issparse(data):
+            self.cache_format = "pickle"
 
-        if self.cache_format == "feather" and not scipy.sparse.issparse(X):
-            logger.info("feather write {}".format(self.name))
-            X.to_feather(data_feather_file)
+        logger.info(f"{self.cache_format} write {self.name}")
+        if self.cache_format == "feather":
+            data.to_feather(data_feather_file)
             with open(feather_attribute_file, "wb") as fh:
                 pickle.dump((categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
+            self.data_feather_file = data_feather_file
+            self.feather_attribute_file = feather_attribute_file
         else:
-            logger.info("pickle write {}".format(self.name))
-            self.cache_format = "pickle"
             with open(data_pickle_file, "wb") as fh:
-                pickle.dump((X, categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
-            logger.debug(
-                "Saved dataset {did}: {name} to file {path}".format(
-                    did=int(self.dataset_id or -1), name=self.name, path=data_pickle_file
-                )
-            )
-        return data_pickle_file, data_feather_file, feather_attribute_file
+                pickle.dump((data, categorical, attribute_names), fh, pickle.HIGHEST_PROTOCOL)
+            self.data_pickle_file = data_pickle_file
+
+        data_file = data_pickle_file if self.cache_format == "pickle" else data_feather_file
+        logger.debug(f"Saved dataset {int(self.dataset_id or -1)}: {self.name} to file {data_file}")
+
+        return data, categorical, attribute_names
 
     def _load_data(self):
-        """ Load data from pickle or arff. Download data first if not present on disk. """
-        if (self.cache_format == "pickle" and self.data_pickle_file is None) or (
-            self.cache_format == "feather" and self.data_feather_file is None
-        ):
+        """ Load data from compressed format or arff. Download data if not present on disk. """
+        need_to_create_pickle = self.cache_format == "pickle" and self.data_pickle_file is None
+        need_to_create_feather = self.cache_format == "feather" and self.data_feather_file is None
+
+        if need_to_create_pickle or need_to_create_feather:
             if self.data_file is None:
                 self._download_data()
-            (
-                self.data_pickle_file,
-                self.data_feather_file,
-                self.feather_attribute_file,
-            ) = self._create_pickle_in_cache(self.data_file)
 
+            file_to_load = self.data_file if self.parquet_file is None else self.parquet_file
+            return self._cache_compressed_file_from_file(file_to_load)
+
+        # helper variable to help identify where errors occur
+        fpath = self.data_feather_file if self.cache_format == "feather" else self.data_pickle_file
+        logger.info(f"{self.cache_format} load data {self.name}")
         try:
             if self.cache_format == "feather":
-                logger.info("feather load data {}".format(self.name))
                 data = pd.read_feather(self.data_feather_file)
-
+                fpath = self.feather_attribute_file
                 with open(self.feather_attribute_file, "rb") as fh:
                     categorical, attribute_names = pickle.load(fh)
             else:
-                logger.info("pickle load data {}".format(self.name))
                 with open(self.data_pickle_file, "rb") as fh:
                     data, categorical, attribute_names = pickle.load(fh)
-        except EOFError:
-            logger.warning(
-                "Detected a corrupt cache file loading dataset %d: '%s'. "
-                "We will continue loading data from the arff-file, "
-                "but this will be much slower for big datasets. "
-                "Please manually delete the cache file if you want OpenML-Python "
-                "to attempt to reconstruct it."
-                "" % (self.dataset_id, self.data_pickle_file)
-            )
-            data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
         except FileNotFoundError:
-            raise ValueError(
-                "Cannot find a pickle file for dataset {} at "
-                "location {} ".format(self.name, self.data_pickle_file)
-            )
-        except ModuleNotFoundError as e:
+            raise ValueError(f"Cannot find file for dataset {self.name} at location '{fpath}'.")
+        except (EOFError, ModuleNotFoundError, ValueError) as e:
+            error_message = e.message if hasattr(e, "message") else e.args[0]
+            hint = ""
+
+            if isinstance(e, EOFError):
+                readable_error = "Detected a corrupt cache file"
+            elif isinstance(e, ModuleNotFoundError):
+                readable_error = "Detected likely dependency issues"
+                hint = "This is most likely due to https://github.com/openml/openml-python/issues/918. "  # noqa: 501
+            elif isinstance(e, ValueError) and "unsupported pickle protocol" in e.args[0]:
+                readable_error = "Encountered unsupported pickle protocol"
+            else:
+                raise  # an unknown ValueError is raised, should crash and file bug report
+
             logger.warning(
-                "Encountered error message when loading cached dataset %d: '%s'. "
-                "Error message was: %s. "
-                "This is most likely due to  https://github.com/openml/openml-python/issues/918. "
+                f"{readable_error} when loading dataset {self.id} from '{fpath}'. "
+                f"{hint}"
+                f"Error message was: {error_message}. "
                 "We will continue loading data from the arff-file, "
                 "but this will be much slower for big datasets. "
                 "Please manually delete the cache file if you want OpenML-Python "
                 "to attempt to reconstruct it."
-                "" % (self.dataset_id, self.data_pickle_file, e.args[0]),
             )
             data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
-        except ValueError as e:
-            if "unsupported pickle protocol" in e.args[0]:
-                logger.warning(
-                    "Encountered unsupported pickle protocol when loading cached dataset %d: '%s'. "
-                    "Error message was: %s. "
-                    "We will continue loading data from the arff-file, "
-                    "but this will be much slower for big datasets. "
-                    "Please manually delete the cache file if you want OpenML-Python "
-                    "to attempt to reconstruct it."
-                    "" % (self.dataset_id, self.data_pickle_file, e.args[0]),
-                )
-                data, categorical, attribute_names = self._parse_data_from_arff(self.data_file)
-            else:
-                raise
 
+        data_up_to_date = isinstance(data, pd.DataFrame) or scipy.sparse.issparse(data)
+        if self.cache_format == "pickle" and not data_up_to_date:
+            logger.info("Updating outdated pickle file.")
+            file_to_load = self.data_file if self.parquet_file is None else self.parquet_file
+            return self._cache_compressed_file_from_file(file_to_load)
         return data, categorical, attribute_names
 
     @staticmethod
@@ -675,6 +639,11 @@ def _encode_if_category(column):
 
     @staticmethod
     def _unpack_categories(series, categories):
+        # nan-likes can not be explicitly specified as a category
+        def valid_category(cat):
+            return isinstance(cat, str) or (cat is not None and not np.isnan(cat))
+
+        filtered_categories = [c for c in categories if valid_category(c)]
         col = []
         for x in series:
             try:
@@ -683,7 +652,7 @@ def _unpack_categories(series, categories):
                 col.append(np.nan)
         # We require two lines to create a series of categories as detailed here:
         # https://pandas.pydata.org/pandas-docs/version/0.24/user_guide/categorical.html#series-creation  # noqa E501
-        raw_cat = pd.Categorical(col, ordered=True, categories=categories)
+        raw_cat = pd.Categorical(col, ordered=True, categories=filtered_categories)
         return pd.Series(raw_cat, index=series.index, name=series.name)
 
     def get_data(
@@ -742,7 +711,7 @@ def get_data(
                 to_exclude.extend(self.ignore_attribute)
 
         if len(to_exclude) > 0:
-            logger.info("Going to remove the following attributes:" " %s" % to_exclude)
+            logger.info("Going to remove the following attributes: %s" % to_exclude)
             keep = np.array(
                 [True if column not in to_exclude else False for column in attribute_names]
             )
@@ -809,6 +778,10 @@ def retrieve_class_labels(self, target_name: str = "class") -> Union[None, List[
         -------
         list
         """
+        if self.features is None:
+            raise ValueError(
+                "retrieve_class_labels can only be called if feature information is available."
+            )
         for feature in self.features.values():
             if (feature.name == target_name) and (feature.data_type == "nominal"):
                 return feature.nominal_values
@@ -937,18 +910,73 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         return data_container
 
 
-def _check_qualities(qualities):
-    if qualities is not None:
-        qualities_ = {}
-        for xmlquality in qualities:
-            name = xmlquality["oml:name"]
-            if xmlquality.get("oml:value", None) is None:
-                value = float("NaN")
-            elif xmlquality["oml:value"] == "null":
-                value = float("NaN")
-            else:
-                value = float(xmlquality["oml:value"])
-            qualities_[name] = value
-        return qualities_
-    else:
-        return None
+def _read_features(features_file: str) -> Dict[int, OpenMLDataFeature]:
+    features_pickle_file = _get_features_pickle_file(features_file)
+    try:
+        with open(features_pickle_file, "rb") as fh_binary:
+            features = pickle.load(fh_binary)
+    except:  # noqa E722
+        with open(features_file, encoding="utf8") as fh:
+            features_xml_string = fh.read()
+        xml_dict = xmltodict.parse(
+            features_xml_string, force_list=("oml:feature", "oml:nominal_value")
+        )
+        features_xml = xml_dict["oml:data_features"]
+
+        features = {}
+        for idx, xmlfeature in enumerate(features_xml["oml:feature"]):
+            nr_missing = xmlfeature.get("oml:number_of_missing_values", 0)
+            feature = OpenMLDataFeature(
+                int(xmlfeature["oml:index"]),
+                xmlfeature["oml:name"],
+                xmlfeature["oml:data_type"],
+                xmlfeature.get("oml:nominal_value"),
+                int(nr_missing),
+            )
+            if idx != feature.index:
+                raise ValueError("Data features not provided in right order")
+            features[feature.index] = feature
+
+        with open(features_pickle_file, "wb") as fh_binary:
+            pickle.dump(features, fh_binary)
+    return features
+
+
+def _get_features_pickle_file(features_file: str) -> str:
+    """This function only exists so it can be mocked during unit testing"""
+    return features_file + ".pkl"
+
+
+def _read_qualities(qualities_file: str) -> Dict[str, float]:
+    qualities_pickle_file = _get_qualities_pickle_file(qualities_file)
+    try:
+        with open(qualities_pickle_file, "rb") as fh_binary:
+            qualities = pickle.load(fh_binary)
+    except:  # noqa E722
+        with open(qualities_file, encoding="utf8") as fh:
+            qualities_xml = fh.read()
+        xml_as_dict = xmltodict.parse(qualities_xml, force_list=("oml:quality",))
+        qualities = xml_as_dict["oml:data_qualities"]["oml:quality"]
+        qualities = _check_qualities(qualities)
+        with open(qualities_pickle_file, "wb") as fh_binary:
+            pickle.dump(qualities, fh_binary)
+    return qualities
+
+
+def _get_qualities_pickle_file(qualities_file: str) -> str:
+    """This function only exists so it can be mocked during unit testing"""
+    return qualities_file + ".pkl"
+
+
+def _check_qualities(qualities: List[Dict[str, str]]) -> Dict[str, float]:
+    qualities_ = {}
+    for xmlquality in qualities:
+        name = xmlquality["oml:name"]
+        if xmlquality.get("oml:value", None) is None:
+            value = float("NaN")
+        elif xmlquality["oml:value"] == "null":
+            value = float("NaN")
+        else:
+            value = float(xmlquality["oml:value"])
+        qualities_[name] = value
+    return qualities_
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 84943b244..746285650 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -3,8 +3,7 @@
 import io
 import logging
 import os
-import re
-from typing import List, Dict, Union, Optional
+from typing import List, Dict, Union, Optional, cast
 
 import numpy as np
 import arff
@@ -18,13 +17,11 @@
 import openml._api_calls
 from .dataset import OpenMLDataset
 from ..exceptions import (
-    OpenMLCacheException,
     OpenMLHashException,
     OpenMLServerException,
     OpenMLPrivateDatasetError,
 )
 from ..utils import (
-    _create_cache_directory,
     _remove_cache_dir_for_id,
     _create_cache_directory_for_id,
 )
@@ -37,118 +34,6 @@
 # Local getters/accessors to the cache directory
 
 
-def _list_cached_datasets():
-    """ Return list with ids of all cached datasets.
-
-    Returns
-    -------
-    list
-        List with IDs of all cached datasets.
-    """
-    datasets = []
-
-    dataset_cache_dir = _create_cache_directory(DATASETS_CACHE_DIR_NAME)
-    directory_content = os.listdir(dataset_cache_dir)
-    directory_content.sort()
-
-    # Find all dataset ids for which we have downloaded the dataset
-    # description
-    for directory_name in directory_content:
-        # First check if the directory name could be an OpenML dataset id
-        if not re.match(r"[0-9]*", directory_name):
-            continue
-
-        dataset_id = int(directory_name)
-
-        directory_name = os.path.join(dataset_cache_dir, directory_name)
-        dataset_directory_content = os.listdir(directory_name)
-
-        if (
-            "dataset.arff" in dataset_directory_content
-            and "description.xml" in dataset_directory_content
-        ):
-            if dataset_id not in datasets:
-                datasets.append(dataset_id)
-
-    datasets.sort()
-    return datasets
-
-
-def _get_cached_datasets():
-    """Searches for all OpenML datasets in the OpenML cache dir.
-
-    Return a dictionary which maps dataset ids to dataset objects"""
-    dataset_list = _list_cached_datasets()
-    datasets = OrderedDict()
-
-    for dataset_id in dataset_list:
-        datasets[dataset_id] = _get_cached_dataset(dataset_id)
-
-    return datasets
-
-
-def _get_cached_dataset(dataset_id: int) -> OpenMLDataset:
-    """Get cached dataset for ID.
-
-    Returns
-    -------
-    OpenMLDataset
-    """
-    description = _get_cached_dataset_description(dataset_id)
-    arff_file = _get_cached_dataset_arff(dataset_id)
-    features = _get_cached_dataset_features(dataset_id)
-    qualities = _get_cached_dataset_qualities(dataset_id)
-    dataset = _create_dataset_from_description(description, features, qualities, arff_file)
-
-    return dataset
-
-
-def _get_cached_dataset_description(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
-    description_file = os.path.join(did_cache_dir, "description.xml")
-    try:
-        with io.open(description_file, encoding="utf8") as fh:
-            dataset_xml = fh.read()
-        return xmltodict.parse(dataset_xml)["oml:data_set_description"]
-    except (IOError, OSError):
-        raise OpenMLCacheException(
-            "Dataset description for dataset id %d not " "cached" % dataset_id
-        )
-
-
-def _get_cached_dataset_features(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
-    features_file = os.path.join(did_cache_dir, "features.xml")
-    try:
-        return _load_features_from_file(features_file)
-    except (IOError, OSError):
-        raise OpenMLCacheException("Dataset features for dataset id %d not " "cached" % dataset_id)
-
-
-def _get_cached_dataset_qualities(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
-    qualities_file = os.path.join(did_cache_dir, "qualities.xml")
-    try:
-        with io.open(qualities_file, encoding="utf8") as fh:
-            qualities_xml = fh.read()
-            qualities_dict = xmltodict.parse(qualities_xml)
-            return qualities_dict["oml:data_qualities"]["oml:quality"]
-    except (IOError, OSError):
-        raise OpenMLCacheException("Dataset qualities for dataset id %d not " "cached" % dataset_id)
-
-
-def _get_cached_dataset_arff(dataset_id):
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
-    output_file = os.path.join(did_cache_dir, "dataset.arff")
-
-    try:
-        with io.open(output_file, encoding="utf8"):
-            pass
-        return output_file
-    except (OSError, IOError):
-        raise OpenMLCacheException("ARFF file for dataset id %d not " "cached" % dataset_id)
-
-
 def _get_cache_directory(dataset: OpenMLDataset) -> str:
     """ Return the cache directory of the OpenMLDataset """
     return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
@@ -183,7 +68,7 @@ def list_datasets(
     status: Optional[str] = None,
     tag: Optional[str] = None,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
 
     """
@@ -251,7 +136,7 @@ def list_datasets(
         size=size,
         status=status,
         tag=tag,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -326,34 +211,59 @@ def __list_datasets(api_call, output_format="dict"):
     return datasets
 
 
-def _load_features_from_file(features_file: str) -> Dict:
-    with io.open(features_file, encoding="utf8") as fh:
-        features_xml = fh.read()
-        xml_dict = xmltodict.parse(features_xml, force_list=("oml:feature", "oml:nominal_value"))
-        return xml_dict["oml:data_features"]
+def _expand_parameter(parameter: Union[str, List[str]]) -> List[str]:
+    expanded_parameter = []
+    if isinstance(parameter, str):
+        expanded_parameter = [x.strip() for x in parameter.split(",")]
+    elif isinstance(parameter, list):
+        expanded_parameter = parameter
+    return expanded_parameter
+
+
+def _validated_data_attributes(
+    attributes: List[str], data_attributes: List[str], parameter_name: str
+) -> None:
+    for attribute_ in attributes:
+        is_attribute_a_data_attribute = any([attr[0] == attribute_ for attr in data_attributes])
+        if not is_attribute_a_data_attribute:
+            raise ValueError(
+                "all attribute of '{}' should be one of the data attribute. "
+                " Got '{}' while candidates are {}.".format(
+                    parameter_name, attribute_, [attr[0] for attr in data_attributes]
+                )
+            )
 
 
-def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
+def check_datasets_active(
+    dataset_ids: List[int], raise_error_if_not_exist: bool = True,
+) -> Dict[int, bool]:
     """
     Check if the dataset ids provided are active.
 
+    Raises an error if a dataset_id in the given list
+    of dataset_ids does not exist on the server.
+
     Parameters
     ----------
     dataset_ids : List[int]
         A list of integers representing dataset ids.
+    raise_error_if_not_exist : bool (default=True)
+        Flag that if activated can raise an error, if one or more of the
+        given dataset ids do not exist on the server.
 
     Returns
     -------
     dict
         A dictionary with items {did: bool}
     """
-    dataset_list = list_datasets(status="all")
+    dataset_list = list_datasets(status="all", data_id=dataset_ids)
     active = {}
 
     for did in dataset_ids:
         dataset = dataset_list.get(did, None)
         if dataset is None:
-            raise ValueError("Could not find dataset {} in OpenML dataset list.".format(did))
+            if raise_error_if_not_exist:
+                raise ValueError(f"Could not find dataset {did} in OpenML dataset list.")
         else:
             active[did] = dataset["status"] == "active"
 
@@ -380,6 +290,8 @@ def _name_to_id(
     error_if_multiple : bool (default=False)
         If `False`, if multiple datasets match, return the least recent active dataset.
         If `True`, if multiple datasets match, raise an error.
+    download_qualities : bool, optional (default=True)
+        If `True`, also download qualities.xml file. If False it skip the qualities.xml.
 
     Returns
     -------
@@ -400,7 +312,7 @@ def _name_to_id(
 
 
 def get_datasets(
-    dataset_ids: List[Union[str, int]], download_data: bool = True,
+    dataset_ids: List[Union[str, int]], download_data: bool = True, download_qualities: bool = True
 ) -> List[OpenMLDataset]:
     """Download datasets.
 
@@ -416,6 +328,8 @@ def get_datasets(
         make the operation noticeably slower. Metadata is also still retrieved.
         If False, create the OpenMLDataset and only populate it with the metadata.
         The data may later be retrieved through the `OpenMLDataset.get_data` method.
+    download_qualities : bool, optional (default=True)
+        If True, also download qualities.xml file. If False it skip the qualities.xml.
 
     Returns
     -------
@@ -424,7 +338,9 @@ def get_datasets(
     """
     datasets = []
     for dataset_id in dataset_ids:
-        datasets.append(get_dataset(dataset_id, download_data))
+        datasets.append(
+            get_dataset(dataset_id, download_data, download_qualities=download_qualities)
+        )
     return datasets
 
 
@@ -435,6 +351,7 @@ def get_dataset(
     version: int = None,
     error_if_multiple: bool = False,
     cache_format: str = "pickle",
+    download_qualities: bool = True,
 ) -> OpenMLDataset:
     """ Download the OpenML dataset representation, optionally also download actual data file.
 
@@ -489,21 +406,28 @@ def get_dataset(
 
     did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
 
+    remove_dataset_cache = True
     try:
-        remove_dataset_cache = True
         description = _get_dataset_description(did_cache_dir, dataset_id)
-        features = _get_dataset_features(did_cache_dir, dataset_id)
+        features_file = _get_dataset_features_file(did_cache_dir, dataset_id)
 
         try:
-            qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
+            if download_qualities:
+                qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id)
+            else:
+                qualities_file = ""
         except OpenMLServerException as e:
             if e.code == 362 and str(e) == "No qualities found - None":
                 logger.warning("No qualities found for dataset {}".format(dataset_id))
-                qualities = None
+                qualities_file = None
             else:
                 raise
 
         arff_file = _get_dataset_arff(description) if download_data else None
+        if "oml:minio_url" in description and download_data:
+            parquet_file = _get_dataset_parquet(description)
+        else:
+            parquet_file = None
         remove_dataset_cache = False
     except OpenMLServerException as e:
         # if there was an exception,
@@ -517,7 +441,7 @@ def get_dataset(
             _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, did_cache_dir)
 
     dataset = _create_dataset_from_description(
-        description, features, qualities, arff_file, cache_format
+        description, features_file, qualities_file, arff_file, parquet_file, cache_format
     )
     return dataset
 
@@ -636,6 +560,7 @@ def create_dataset(
     ignore_attribute : str | list
         Attributes that should be excluded in modelling,
         such as identifiers and indexes.
+        Can have multiple values, comma separated.
     citation : str
         Reference(s) that should be cited when building on this data.
     version_label : str, optional
@@ -687,6 +612,11 @@ def create_dataset(
                     attributes_[attr_idx] = (attr_name, attributes[attr_name])
     else:
         attributes_ = attributes
+    ignore_attributes = _expand_parameter(ignore_attribute)
+    _validated_data_attributes(ignore_attributes, attributes_, "ignore_attribute")
+
+    default_target_attributes = _expand_parameter(default_target_attribute)
+    _validated_data_attributes(default_target_attributes, attributes_, "default_target_attribute")
 
     if row_id_attribute is not None:
         is_row_id_an_attribute = any([attr[0] == row_id_attribute for attr in attributes_])
@@ -943,6 +873,47 @@ def fork_dataset(data_id: int) -> int:
     return int(data_id)
 
 
+def _topic_add_dataset(data_id: int, topic: str):
+    """
+    Adds a topic for a dataset.
+    This API is not available for all OpenML users and is accessible only by admins.
+    Parameters
+    ----------
+    data_id : int
+        id of the dataset for which the topic needs to be added
+    topic : str
+        Topic to be added for the dataset
+   """
+    if not isinstance(data_id, int):
+        raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
+    form_data = {"data_id": data_id, "topic": topic}
+    result_xml = openml._api_calls._perform_api_call("data/topicadd", "post", data=form_data)
+    result = xmltodict.parse(result_xml)
+    data_id = result["oml:data_topic"]["oml:id"]
+    return int(data_id)
+
+
+def _topic_delete_dataset(data_id: int, topic: str):
+    """
+    Removes a topic from a dataset.
+    This API is not available for all OpenML users and is accessible only by admins.
+    Parameters
+    ----------
+    data_id : int
+        id of the dataset to be forked
+    topic : str
+        Topic to be deleted
+
+   """
+    if not isinstance(data_id, int):
+        raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
+    form_data = {"data_id": data_id, "topic": topic}
+    result_xml = openml._api_calls._perform_api_call("data/topicdelete", "post", data=form_data)
+    result = xmltodict.parse(result_xml)
+    data_id = result["oml:data_topic"]["oml:id"]
+    return int(data_id)
+
+
 def _get_dataset_description(did_cache_dir, dataset_id):
     """Get the dataset description as xml dictionary.
 
@@ -969,8 +940,9 @@ def _get_dataset_description(did_cache_dir, dataset_id):
     description_file = os.path.join(did_cache_dir, "description.xml")
 
     try:
-        return _get_cached_dataset_description(dataset_id)
-    except OpenMLCacheException:
+        with io.open(description_file, encoding="utf8") as fh:
+            dataset_xml = fh.read()
+    except Exception:
         url_extension = "data/{}".format(dataset_id)
         dataset_xml = openml._api_calls._perform_api_call(url_extension, "get")
         with io.open(description_file, "w", encoding="utf8") as fh:
@@ -981,6 +953,55 @@ def _get_dataset_description(did_cache_dir, dataset_id):
     return description
 
 
+def _get_dataset_parquet(
+    description: Union[Dict, OpenMLDataset], cache_directory: str = None
+) -> Optional[str]:
+    """ Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
+
+    Checks if the file is in the cache, if yes, return the path to the file.
+    If not, downloads the file and caches it, then returns the file path.
+    The cache directory is generated based on dataset information, but can also be specified.
+
+    This function is NOT thread/multiprocessing safe.
+    Unlike the ARFF equivalent, checksums are not available/used (for now).
+
+    Parameters
+    ----------
+    description : dictionary or OpenMLDataset
+        Either a dataset description as dict or OpenMLDataset.
+
+    cache_directory: str, optional (default=None)
+        Folder to store the parquet file in.
+        If None, use the default cache directory for the dataset.
+
+    Returns
+    -------
+    output_filename : string, optional
+        Location of the Parquet file if successfully downloaded, None otherwise.
+    """
+    if isinstance(description, dict):
+        url = description.get("oml:minio_url")
+        did = description.get("oml:id")
+    elif isinstance(description, OpenMLDataset):
+        url = description._minio_url
+        did = description.dataset_id
+    else:
+        raise TypeError("`description` should be either OpenMLDataset or Dict.")
+
+    if cache_directory is None:
+        cache_directory = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, did)
+    output_file_path = os.path.join(cache_directory, "dataset.pq")
+
+    if not os.path.isfile(output_file_path):
+        try:
+            openml._api_calls._download_minio_file(
+                source=cast(str, url), destination=output_file_path
+            )
+        except FileNotFoundError:
+            return None
+    return output_file_path
+
+
 def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
     """ Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
 
@@ -1031,8 +1052,8 @@ def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory:
     return output_file_path
 
 
-def _get_dataset_features(did_cache_dir, dataset_id):
-    """API call to get dataset features (cached)
+def _get_dataset_features_file(did_cache_dir: str, dataset_id: int) -> str:
+    """API call to load dataset features. Loads from cache or downloads them.
 
     Features are feature descriptions for each column.
     (name, index, categorical, ...)
@@ -1049,8 +1070,8 @@ def _get_dataset_features(did_cache_dir, dataset_id):
 
     Returns
     -------
-    features : dict
-        Dictionary containing dataset feature descriptions, parsed from XML.
+    str
+        Path of the cached dataset feature file
     """
     features_file = os.path.join(did_cache_dir, "features.xml")
 
@@ -1061,11 +1082,11 @@ def _get_dataset_features(did_cache_dir, dataset_id):
         with io.open(features_file, "w", encoding="utf8") as fh:
             fh.write(features_xml)
 
-    return _load_features_from_file(features_file)
+    return features_file
 
 
-def _get_dataset_qualities(did_cache_dir, dataset_id):
-    """API call to get dataset qualities (cached)
+def _get_dataset_qualities_file(did_cache_dir, dataset_id):
+    """API call to load dataset qualities. Loads from cache or downloads them.
 
     Features are metafeatures (number of features, number of classes, ...)
 
@@ -1079,10 +1100,12 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
     dataset_id : int
         Dataset ID
 
+    download_qualities : bool
+        wheather to download/use cahsed version or not.
     Returns
     -------
-    qualities : dict
-        Dictionary containing dataset qualities, parsed from XML.
+    str
+        Path of the cached qualities file
     """
     # Dataset qualities are subject to change and must be fetched every time
     qualities_file = os.path.join(did_cache_dir, "qualities.xml")
@@ -1092,21 +1115,17 @@ def _get_dataset_qualities(did_cache_dir, dataset_id):
     except (OSError, IOError):
         url_extension = "data/qualities/{}".format(dataset_id)
         qualities_xml = openml._api_calls._perform_api_call(url_extension, "get")
-
         with io.open(qualities_file, "w", encoding="utf8") as fh:
             fh.write(qualities_xml)
-
-    xml_as_dict = xmltodict.parse(qualities_xml, force_list=("oml:quality",))
-    qualities = xml_as_dict["oml:data_qualities"]["oml:quality"]
-
-    return qualities
+    return qualities_file
 
 
 def _create_dataset_from_description(
     description: Dict[str, str],
-    features: Dict,
-    qualities: List,
+    features_file: str,
+    qualities_file: str,
     arff_file: str = None,
+    parquet_file: str = None,
     cache_format: str = "pickle",
 ) -> OpenMLDataset:
     """Create a dataset object from a description dict.
@@ -1115,12 +1134,14 @@ def _create_dataset_from_description(
     ----------
     description : dict
         Description of a dataset in xml dict.
-    features : dict
-        Description of a dataset features.
+    featuresfile : str
+        Path of the dataset features as xml file.
     qualities : list
-        Description of a dataset qualities.
+        Path of the dataset qualities as xml file.
     arff_file : string, optional
         Path of dataset ARFF file.
+    parquet_file : string, optional
+        Path of dataset Parquet file.
     cache_format: string, optional
         Caching option for datasets (feather/pickle)
 
@@ -1155,8 +1176,10 @@ def _create_dataset_from_description(
         md5_checksum=description.get("oml:md5_checksum"),
         data_file=arff_file,
         cache_format=cache_format,
-        features=features,
-        qualities=qualities,
+        features_file=features_file,
+        qualities_file=qualities_file,
+        minio_url=description.get("oml:minio_url"),
+        parquet_file=parquet_file,
     )
 
 
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
index 2d06b69e0..4529ad163 100644
--- a/openml/extensions/extension_interface.py
+++ b/openml/extensions/extension_interface.py
@@ -229,6 +229,19 @@ def obtain_parameter_values(
             - ``oml:component`` : int: flow id to which the parameter belongs
         """
 
+    @abstractmethod
+    def check_if_model_fitted(self, model: Any) -> bool:
+        """Returns True/False denoting if the model has already been fitted/trained.
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+
     ################################################################################################
     # Abstract methods for hyperparameter optimization
 
diff --git a/openml/extensions/sklearn/__init__.py b/openml/extensions/sklearn/__init__.py
index 2003934db..135e5ccf6 100644
--- a/openml/extensions/sklearn/__init__.py
+++ b/openml/extensions/sklearn/__init__.py
@@ -7,3 +7,31 @@
 __all__ = ["SklearnExtension"]
 
 register_extension(SklearnExtension)
+
+
+def cont(X):
+    """Returns True for all non-categorical columns, False for the rest.
+
+    This is a helper function for OpenML datasets encoded as DataFrames simplifying the handling
+    of mixed data types. To build sklearn models on mixed data types, a ColumnTransformer is
+    required to process each type of columns separately.
+    This function allows transformations meant for continuous/numeric columns to access the
+    continuous/numeric columns given the dataset as DataFrame.
+    """
+    if not hasattr(X, "dtypes"):
+        raise AttributeError("Not a Pandas DataFrame with 'dtypes' as attribute!")
+    return X.dtypes != "category"
+
+
+def cat(X):
+    """Returns True for all categorical columns, False for the rest.
+
+    This is a helper function for OpenML datasets encoded as DataFrames simplifying the handling
+    of mixed data types. To build sklearn models on mixed data types, a ColumnTransformer is
+    required to process each type of columns separately.
+    This function allows transformations meant for categorical columns to access the
+    categorical columns given the dataset as DataFrame.
+    """
+    if not hasattr(X, "dtypes"):
+        raise AttributeError("Not a Pandas DataFrame with 'dtypes' as attribute!")
+    return X.dtypes == "category"
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index edb14487b..3441b4a4e 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -52,7 +52,10 @@
 
 
 SIMPLE_NUMPY_TYPES = [
-    nptype for type_cat, nptypes in np.sctypes.items() for nptype in nptypes if type_cat != "others"
+    nptype
+    for type_cat, nptypes in np.sctypes.items()
+    for nptype in nptypes  # type: ignore
+    if type_cat != "others"
 ]
 SIMPLE_TYPES = tuple([bool, int, float, str] + SIMPLE_NUMPY_TYPES)
 
@@ -211,6 +214,61 @@ def remove_all_in_parentheses(string: str) -> str:
 
         return short_name.format(pipeline)
 
+    @classmethod
+    def _min_dependency_str(cls, sklearn_version: str) -> str:
+        """ Returns a string containing the minimum dependencies for the sklearn version passed.
+
+        Parameters
+        ----------
+        sklearn_version : str
+            A version string of the xx.xx.xx
+
+        Returns
+        -------
+        str
+        """
+        openml_major_version = int(LooseVersion(openml.__version__).version[1])
+        # This explicit check is necessary to support existing entities on the OpenML servers
+        # that used the fixed dependency string (in the else block)
+        if openml_major_version > 11:
+            # OpenML v0.11 onwards supports sklearn>=0.24
+            # assumption: 0.24 onwards sklearn should contain a _min_dependencies.py file with
+            # variables declared for extracting minimum dependency for that version
+            if LooseVersion(sklearn_version) >= "0.24":
+                from sklearn import _min_dependencies as _mindep
+
+                dependency_list = {
+                    "numpy": "{}".format(_mindep.NUMPY_MIN_VERSION),
+                    "scipy": "{}".format(_mindep.SCIPY_MIN_VERSION),
+                    "joblib": "{}".format(_mindep.JOBLIB_MIN_VERSION),
+                    "threadpoolctl": "{}".format(_mindep.THREADPOOLCTL_MIN_VERSION),
+                }
+            elif LooseVersion(sklearn_version) >= "0.23":
+                dependency_list = {
+                    "numpy": "1.13.3",
+                    "scipy": "0.19.1",
+                    "joblib": "0.11",
+                    "threadpoolctl": "2.0.0",
+                }
+                if LooseVersion(sklearn_version).version[2] == 0:
+                    dependency_list.pop("threadpoolctl")
+            elif LooseVersion(sklearn_version) >= "0.21":
+                dependency_list = {"numpy": "1.11.0", "scipy": "0.17.0", "joblib": "0.11"}
+            elif LooseVersion(sklearn_version) >= "0.19":
+                dependency_list = {"numpy": "1.8.2", "scipy": "0.13.3"}
+            else:
+                dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
+        else:
+            # this is INCORRECT for sklearn versions >= 0.19 and < 0.24
+            # given that OpenML has existing flows uploaded with such dependency information,
+            # we change no behaviour for older sklearn version, however from 0.24 onwards
+            # the dependency list will be accurately updated for any flow uploaded to OpenML
+            dependency_list = {"numpy": "1.6.1", "scipy": "0.9"}
+
+        sklearn_dep = "sklearn=={}".format(sklearn_version)
+        dep_str = "\n".join(["{}>={}".format(k, v) for k, v in dependency_list.items()])
+        return "\n".join([sklearn_dep, dep_str])
+
     ################################################################################################
     # Methods for flow serialization and de-serialization
 
@@ -491,7 +549,7 @@ def get_version_information(self) -> List[str]:
         major, minor, micro, _, _ = sys.version_info
         python_version = "Python_{}.".format(".".join([str(major), str(minor), str(micro)]))
         sklearn_version = "Sklearn_{}.".format(sklearn.__version__)
-        numpy_version = "NumPy_{}.".format(numpy.__version__)
+        numpy_version = "NumPy_{}.".format(numpy.__version__)  # type: ignore
         scipy_version = "SciPy_{}.".format(scipy.__version__)
 
         return [python_version, sklearn_version, numpy_version, scipy_version]
@@ -508,8 +566,7 @@ def create_setup_string(self, model: Any) -> str:
         str
         """
         run_environment = " ".join(self.get_version_information())
-        # fixme str(model) might contain (...)
-        return run_environment + " " + str(model)
+        return run_environment
 
     def _is_cross_validator(self, o: Any) -> bool:
         return isinstance(o, sklearn.model_selection.BaseCrossValidator)
@@ -769,20 +826,13 @@ def _serialize_model(self, model: Any) -> OpenMLFlow:
             tags=tags,
             extension=self,
             language="English",
-            # TODO fill in dependencies!
             dependencies=dependencies,
         )
 
         return flow
 
     def _get_dependencies(self) -> str:
-        dependencies = "\n".join(
-            [
-                self._format_external_version("sklearn", sklearn.__version__,),
-                "numpy>=1.6.1",
-                "scipy>=0.9",
-            ]
-        )
+        dependencies = self._min_dependency_str(sklearn.__version__)
         return dependencies
 
     def _get_tags(self) -> List[str]:
@@ -1189,11 +1239,11 @@ def _check_dependencies(self, dependencies: str, strict_version: bool = True) ->
     def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
         mapping = {
             float: "float",
-            np.float: "np.float",
+            np.float: "np.float",  # type: ignore
             np.float32: "np.float32",
             np.float64: "np.float64",
             int: "int",
-            np.int: "np.int",
+            np.int: "np.int",  # type: ignore
             np.int32: "np.int32",
             np.int64: "np.int64",
         }
@@ -1205,11 +1255,11 @@ def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
     def _deserialize_type(self, o: str) -> Any:
         mapping = {
             "float": float,
-            "np.float": np.float,
+            "np.float": np.float,  # type: ignore
             "np.float32": np.float32,
             "np.float64": np.float64,
             "int": int,
-            "np.int": np.int,
+            "np.int": np.int,  # type: ignore
             "np.int32": np.int32,
             "np.int64": np.int64,
         }
@@ -1537,6 +1587,37 @@ def _seed_current_object(current_value):
         model.set_params(**random_states)
         return model
 
+    def check_if_model_fitted(self, model: Any) -> bool:
+        """Returns True/False denoting if the model has already been fitted/trained
+
+        Parameters
+        ----------
+        model : Any
+
+        Returns
+        -------
+        bool
+        """
+        try:
+            # check if model is fitted
+            from sklearn.exceptions import NotFittedError
+
+            # Creating random dummy data of arbitrary size
+            dummy_data = np.random.uniform(size=(10, 3))
+            # Using 'predict' instead of 'sklearn.utils.validation.check_is_fitted' for a more
+            # robust check that works across sklearn versions and models. Internally, 'predict'
+            # should call 'check_is_fitted' for every concerned attribute, thus offering a more
+            # assured check than explicit calls to 'check_is_fitted'
+            model.predict(dummy_data)
+            # Will reach here if the model was fit on a dataset with 3 features
+            return True
+        except NotFittedError:  # needs to be the first exception to be caught
+            # Model is not fitted, as is required
+            return False
+        except ValueError:
+            # Will reach here if the model was fit on a dataset with more or less than 3 features
+            return True
+
     def _run_model_on_fold(
         self,
         model: Any,
@@ -1546,7 +1627,9 @@ def _run_model_on_fold(
         fold_no: int,
         y_train: Optional[np.ndarray] = None,
         X_test: Optional[Union[np.ndarray, scipy.sparse.spmatrix, pd.DataFrame]] = None,
-    ) -> Tuple[np.ndarray, pd.DataFrame, "OrderedDict[str, float]", Optional[OpenMLRunTrace]]:
+    ) -> Tuple[
+        np.ndarray, Optional[pd.DataFrame], "OrderedDict[str, float]", Optional[OpenMLRunTrace]
+    ]:
         """Run a model on a repeat,fold,subsample triplet of the task and return prediction
         information.
 
@@ -1581,19 +1664,21 @@ def _run_model_on_fold(
         -------
         pred_y : np.ndarray
             Predictions on the training/test set, depending on the task type.
-            For supervised tasks, predicitons are on the test set.
-            For unsupervised tasks, predicitons are on the training set.
-        proba_y : pd.DataFrame
+            For supervised tasks, predictions are on the test set.
+            For unsupervised tasks, predictions are on the training set.
+        proba_y : pd.DataFrame, optional
             Predicted probabilities for the test set.
             None, if task is not Classification or Learning Curve prediction.
         user_defined_measures : OrderedDict[str, float]
             User defined measures that were generated on this fold
-        trace : Optional[OpenMLRunTrace]]
+        trace : OpenMLRunTrace, optional
             arff trace object from a fitted model and the trace content obtained by
             repeatedly calling ``run_model_on_task``
         """
 
-        def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.DataFrame:
+        def _prediction_to_probabilities(
+            y: Union[np.ndarray, List], model_classes: List[Any], class_labels: Optional[List[str]]
+        ) -> pd.DataFrame:
             """Transforms predicted probabilities to match with OpenML class indices.
 
             Parameters
@@ -1603,28 +1688,26 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
                 training data).
             model_classes : list
                 List of classes known_predicted by the model, ordered by their index.
+            class_labels : list
+                List of classes as stored in the task object fetched from server.
 
             Returns
             -------
             pd.DataFrame
             """
+            if class_labels is None:
+                raise ValueError("The task has no class labels")
 
-            if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
-                if task.class_labels is not None:
-                    if isinstance(y_train, np.ndarray) and isinstance(task.class_labels[0], str):
-                        # mapping (decoding) the predictions to the categories
-                        # creating a separate copy to not change the expected pred_y type
-                        y = [task.class_labels[pred] for pred in y]
-                else:
-                    raise ValueError("The task has no class labels")
-            else:
-                return None
+            if isinstance(y_train, np.ndarray) and isinstance(class_labels[0], str):
+                # mapping (decoding) the predictions to the categories
+                # creating a separate copy to not change the expected pred_y type
+                y = [class_labels[pred] for pred in y]  # list or numpy array of predictions
 
-            # y: list or numpy array of predictions
             # model_classes: sklearn classifier mapping from original array id to
             # prediction index id
             if not isinstance(model_classes, list):
                 raise ValueError("please convert model classes to list prior to calling this fn")
+
             # DataFrame allows more accurate mapping of classes as column names
             result = pd.DataFrame(
                 0, index=np.arange(len(y)), columns=model_classes, dtype=np.float32
@@ -1639,10 +1722,6 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
             if X_test is None:
                 raise TypeError("argument X_test must not be of type None")
 
-        # TODO: if possible, give a warning if model is already fitted (acceptable
-        # in case of custom experimentation,
-        # but not desirable if we want to upload to OpenML).
-
         model_copy = sklearn.base.clone(model, safe=True)
         # sanity check: prohibit users from optimizing n_jobs
         self._prevent_optimize_n_jobs(model_copy)
@@ -1667,6 +1746,8 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
                 user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
 
             modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
+            if hasattr(model_copy, "refit_time_"):
+                modelfit_dur_walltime += model_copy.refit_time_
             if can_measure_wallclocktime:
                 user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
 
@@ -1732,10 +1813,7 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
                 proba_y = model_copy.predict_proba(X_test)
                 proba_y = pd.DataFrame(proba_y, columns=model_classes)  # handles X_test as numpy
             except AttributeError:  # predict_proba is not available when probability=False
-                if task.class_labels is not None:
-                    proba_y = _prediction_to_probabilities(pred_y, model_classes)
-                else:
-                    raise ValueError("The task has no class labels")
+                proba_y = _prediction_to_probabilities(pred_y, model_classes, task.class_labels)
 
             if task.class_labels is not None:
                 if proba_y.shape[1] != len(task.class_labels):
@@ -1753,12 +1831,13 @@ def _prediction_to_probabilities(y: np.ndarray, model_classes: List[Any]) -> pd.
                         proba_y.shape[1], len(task.class_labels),
                     )
                     warnings.warn(message)
-                    openml.config.logger.warn(message)
+                    openml.config.logger.warning(message)
 
                     for i, col in enumerate(task.class_labels):
                         # adding missing columns with 0 probability
                         if col not in model_classes:
                             proba_y[col] = 0
+                    # We re-order the columns to move possibly added missing columns into place.
                     proba_y = proba_y[task.class_labels]
             else:
                 raise ValueError("The task has no class labels")
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 5aaf70a9d..2acbcb0d1 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -229,7 +229,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
         if not self.description:
             logger = logging.getLogger(__name__)
-            logger.warn("Flow % has empty description", self.name)
+            logger.warning("Flow % has empty description", self.name)
 
         flow_parameters = []
         for key in self.parameters:
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index a08c84df8..92044a1b4 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -10,7 +10,9 @@
 
 import sklearn.metrics
 import xmltodict
+import numpy as np
 import pandas as pd
+from joblib.parallel import Parallel, delayed
 
 import openml
 import openml.utils
@@ -53,6 +55,7 @@ def run_model_on_task(
     upload_flow: bool = False,
     return_flow: bool = False,
     dataset_format: str = "dataframe",
+    n_jobs: Optional[int] = None,
 ) -> Union[OpenMLRun, Tuple[OpenMLRun, OpenMLFlow]]:
     """Run the model on the dataset defined by the task.
 
@@ -83,6 +86,10 @@ def run_model_on_task(
     dataset_format : str (default='dataframe')
         If 'array', the dataset is passed to the model as a numpy array.
         If 'dataframe', the dataset is passed to the model as a pandas dataframe.
+    n_jobs : int (default=None)
+        The number of processes/threads to distribute the evaluation asynchronously.
+        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
+        If `-1`, then the job uses as many cores available.
 
     Returns
     -------
@@ -130,6 +137,7 @@ def get_task_and_type_conversion(task: Union[int, str, OpenMLTask]) -> OpenMLTas
         add_local_measures=add_local_measures,
         upload_flow=upload_flow,
         dataset_format=dataset_format,
+        n_jobs=n_jobs,
     )
     if return_flow:
         return run, flow
@@ -145,6 +153,7 @@ def run_flow_on_task(
     add_local_measures: bool = True,
     upload_flow: bool = False,
     dataset_format: str = "dataframe",
+    n_jobs: Optional[int] = None,
 ) -> OpenMLRun:
 
     """Run the model provided by the flow on the dataset defined by task.
@@ -180,6 +189,10 @@ def run_flow_on_task(
     dataset_format : str (default='dataframe')
         If 'array', the dataset is passed to the model as a numpy array.
         If 'dataframe', the dataset is passed to the model as a pandas dataframe.
+    n_jobs : int (default=None)
+        The number of processes/threads to distribute the evaluation asynchronously.
+        If `None` or `1`, then the evaluation is treated as synchronous and processed sequentially.
+        If `-1`, then the job uses as many cores available.
 
     Returns
     -------
@@ -250,14 +263,20 @@ def run_flow_on_task(
     run_environment = flow.extension.get_version_information()
     tags = ["openml-python", run_environment[1]]
 
+    if flow.extension.check_if_model_fitted(flow.model):
+        warnings.warn(
+            "The model is already fitted!"
+            " This might cause inconsistency in comparison of results."
+        )
+
     # execute the run
     res = _run_task_get_arffcontent(
-        flow=flow,
         model=flow.model,
         task=task,
         extension=flow.extension,
         add_local_measures=add_local_measures,
         dataset_format=dataset_format,
+        n_jobs=n_jobs,
     )
 
     data_content, trace, fold_evaluations, sample_evaluations = res
@@ -412,12 +431,12 @@ def run_exists(task_id: int, setup_id: int) -> Set[int]:
 
 
 def _run_task_get_arffcontent(
-    flow: OpenMLFlow,
     model: Any,
     task: OpenMLTask,
     extension: "Extension",
     add_local_measures: bool,
     dataset_format: str,
+    n_jobs: int = None,
 ) -> Tuple[
     List[List],
     Optional[OpenMLRunTrace],
@@ -440,55 +459,36 @@ def _run_task_get_arffcontent(
     # methods, less maintenance, less confusion)
     num_reps, num_folds, num_samples = task.get_split_dimensions()
 
+    jobs = []
     for n_fit, (rep_no, fold_no, sample_no) in enumerate(
         itertools.product(range(num_reps), range(num_folds), range(num_samples),), start=1
     ):
-
-        train_indices, test_indices = task.get_train_test_split_indices(
-            repeat=rep_no, fold=fold_no, sample=sample_no
-        )
-        if isinstance(task, OpenMLSupervisedTask):
-            x, y = task.get_X_and_y(dataset_format=dataset_format)
-            if dataset_format == "dataframe":
-                train_x = x.iloc[train_indices]
-                train_y = y.iloc[train_indices]
-                test_x = x.iloc[test_indices]
-                test_y = y.iloc[test_indices]
-            else:
-                train_x = x[train_indices]
-                train_y = y[train_indices]
-                test_x = x[test_indices]
-                test_y = y[test_indices]
-        elif isinstance(task, OpenMLClusteringTask):
-            x = task.get_X(dataset_format=dataset_format)
-            if dataset_format == "dataframe":
-                train_x = x.iloc[train_indices]
-            else:
-                train_x = x[train_indices]
-            train_y = None
-            test_x = None
-            test_y = None
-        else:
-            raise NotImplementedError(task.task_type)
-
-        config.logger.info(
-            "Going to execute flow '%s' on task %d for repeat %d fold %d sample %d.",
-            flow.name,
-            task.task_id,
-            rep_no,
-            fold_no,
-            sample_no,
-        )
-
-        pred_y, proba_y, user_defined_measures_fold, trace = extension._run_model_on_fold(
+        jobs.append((n_fit, rep_no, fold_no, sample_no))
+
+    # The forked child process may not copy the configuration state of OpenML from the parent.
+    # Current configuration setup needs to be copied and passed to the child processes.
+    _config = config.get_config_as_dict()
+    # Execute runs in parallel
+    # assuming the same number of tasks as workers (n_jobs), the total compute time for this
+    # statement will be similar to the slowest run
+    job_rvals = Parallel(verbose=0, n_jobs=n_jobs)(
+        delayed(_run_task_get_arffcontent_parallel_helper)(
+            extension=extension,
+            fold_no=fold_no,
             model=model,
-            task=task,
-            X_train=train_x,
-            y_train=train_y,
             rep_no=rep_no,
-            fold_no=fold_no,
-            X_test=test_x,
+            sample_no=sample_no,
+            task=task,
+            dataset_format=dataset_format,
+            configuration=_config,
         )
+        for n_fit, rep_no, fold_no, sample_no in jobs
+    )  # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`
+
+    for n_fit, rep_no, fold_no, sample_no in jobs:
+        pred_y, proba_y, test_indices, test_y, trace, user_defined_measures_fold = job_rvals[
+            n_fit - 1
+        ]
         if trace is not None:
             traces.append(trace)
 
@@ -502,7 +502,9 @@ def _calculate_local_measure(sklearn_fn, openml_name):
             for i, tst_idx in enumerate(test_indices):
                 if task.class_labels is not None:
                     prediction = (
-                        task.class_labels[pred_y[i]] if isinstance(pred_y[i], int) else pred_y[i]
+                        task.class_labels[pred_y[i]]
+                        if isinstance(pred_y[i], (int, np.integer))
+                        else pred_y[i]
                     )
                     if isinstance(test_y, pd.Series):
                         test_prediction = (
@@ -513,7 +515,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                     else:
                         test_prediction = (
                             task.class_labels[test_y[i]]
-                            if isinstance(test_y[i], int)
+                            if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
                         )
                     pred_prob = proba_y.iloc[i] if isinstance(proba_y, pd.DataFrame) else proba_y[i]
@@ -606,6 +608,75 @@ def _calculate_local_measure(sklearn_fn, openml_name):
     )
 
 
+def _run_task_get_arffcontent_parallel_helper(
+    extension: "Extension",
+    fold_no: int,
+    model: Any,
+    rep_no: int,
+    sample_no: int,
+    task: OpenMLTask,
+    dataset_format: str,
+    configuration: Dict = None,
+) -> Tuple[
+    np.ndarray,
+    Optional[pd.DataFrame],
+    np.ndarray,
+    Optional[pd.DataFrame],
+    Optional[OpenMLRunTrace],
+    "OrderedDict[str, float]",
+]:
+    # Sets up the OpenML instantiated in the child process to match that of the parent's
+    # if configuration=None, loads the default
+    config._setup(configuration)
+
+    train_indices, test_indices = task.get_train_test_split_indices(
+        repeat=rep_no, fold=fold_no, sample=sample_no
+    )
+
+    if isinstance(task, OpenMLSupervisedTask):
+        x, y = task.get_X_and_y(dataset_format=dataset_format)
+        if dataset_format == "dataframe":
+            train_x = x.iloc[train_indices]
+            train_y = y.iloc[train_indices]
+            test_x = x.iloc[test_indices]
+            test_y = y.iloc[test_indices]
+        else:
+            train_x = x[train_indices]
+            train_y = y[train_indices]
+            test_x = x[test_indices]
+            test_y = y[test_indices]
+    elif isinstance(task, OpenMLClusteringTask):
+        x = task.get_X(dataset_format=dataset_format)
+        if dataset_format == "dataframe":
+            train_x = x.iloc[train_indices]
+        else:
+            train_x = x[train_indices]
+        train_y = None
+        test_x = None
+        test_y = None
+    else:
+        raise NotImplementedError(task.task_type)
+    config.logger.info(
+        "Going to run model {} on dataset {} for repeat {} fold {} sample {}".format(
+            str(model),
+            openml.datasets.get_dataset(task.dataset_id).name,
+            rep_no,
+            fold_no,
+            sample_no,
+        )
+    )
+    pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
+        model=model,
+        task=task,
+        X_train=train_x,
+        y_train=train_y,
+        rep_no=rep_no,
+        fold_no=fold_no,
+        X_test=test_x,
+    )
+    return pred_y, proba_y, test_indices, test_y, trace, user_defined_measures_fold
+
+
 def get_runs(run_ids):
     """Gets all runs in run_ids list.
 
@@ -734,6 +805,9 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
     flow_name = obtain_field(run, "oml:flow_name", from_server)
     setup_id = obtain_field(run, "oml:setup_id", from_server, cast=int)
     setup_string = obtain_field(run, "oml:setup_string", from_server)
+    # run_details is currently not sent by the server, so we need to retrieve it safely.
+    # whenever that's resolved, we can enforce it being present (OpenML#1087)
+    run_details = obtain_field(run, "oml:run_details", from_server=False)
 
     if "oml:input_data" in run:
         dataset_id = int(run["oml:input_data"]["oml:dataset"]["oml:did"])
@@ -756,6 +830,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
     if "oml:output_data" not in run:
         if from_server:
             raise ValueError("Run does not contain output_data " "(OpenML server error?)")
+        predictions_url = None
     else:
         output_data = run["oml:output_data"]
         predictions_url = None
@@ -840,6 +915,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
         sample_evaluations=sample_evaluations,
         tags=tags,
         predictions_url=predictions_url,
+        run_details=run_details,
     )
 
 
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 0311272b2..4c1c9907d 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -57,7 +57,9 @@ class OpenMLRun(OpenMLBase):
     run_id: int
     description_text: str, optional
         Description text to add to the predictions file.
-        If left None,
+        If left None, is set to the time the arff file is generated.
+    run_details: str, optional (default=None)
+        Description of the run stored in the run meta-data.
     """
 
     def __init__(
@@ -86,6 +88,7 @@ def __init__(
         flow=None,
         run_id=None,
         description_text=None,
+        run_details=None,
     ):
         self.uploader = uploader
         self.uploader_name = uploader_name
@@ -112,6 +115,7 @@ def __init__(
         self.tags = tags
         self.predictions_url = predictions_url
         self.description_text = description_text
+        self.run_details = run_details
 
     @property
     def id(self) -> Optional[int]:
@@ -543,11 +547,15 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         description["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
         description["oml:run"]["oml:task_id"] = self.task_id
         description["oml:run"]["oml:flow_id"] = self.flow_id
+        if self.setup_string is not None:
+            description["oml:run"]["oml:setup_string"] = self.setup_string
         if self.error_message is not None:
             description["oml:run"]["oml:error_message"] = self.error_message
+        if self.run_details is not None:
+            description["oml:run"]["oml:run_details"] = self.run_details
         description["oml:run"]["oml:parameter_setting"] = self.parameter_settings
         if self.tags is not None:
-            description["oml:run"]["oml:tag"] = self.tags  # Tags describing the run
+            description["oml:run"]["oml:tag"] = self.tags
         if (self.fold_evaluations is not None and len(self.fold_evaluations) > 0) or (
             self.sample_evaluations is not None and len(self.sample_evaluations) > 0
         ):
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 632581022..ee877ddf2 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -58,7 +58,7 @@ def get_study(
             "of things have changed since then. Please use `get_suite('OpenML100')` instead."
         )
         warnings.warn(message, DeprecationWarning)
-        openml.config.logger.warn(message)
+        openml.config.logger.warning(message)
         study = _get_study(study_id, entity_type="task")
         return cast(OpenMLBenchmarkSuite, study)  # type: ignore
     else:
diff --git a/openml/testing.py b/openml/testing.py
index da07b0ed7..f8e22bb4c 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -6,18 +6,13 @@
 import shutil
 import sys
 import time
-from typing import Dict
+from typing import Dict, Union, cast
 import unittest
-import warnings
-
-# Currently, importing oslo raises a lot of warning that it will stop working
-# under python3.8; remove this once they disappear
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    from oslo_concurrency import lockutils
+import pandas as pd
 
 import openml
 from openml.tasks import TaskType
+from openml.exceptions import OpenMLServerException
 
 import logging
 
@@ -98,13 +93,6 @@ def setUp(self, n_levels: int = 1):
         openml.config.avoid_duplicate_runs = False
         openml.config.cache_directory = self.workdir
 
-        # If we're on travis, we save the api key in the config file to allow
-        # the notebook tests to read them.
-        if os.environ.get("TRAVIS") or os.environ.get("APPVEYOR"):
-            with lockutils.external_lock("config", lock_path=self.workdir):
-                with open(openml.config.config_file, "w") as fh:
-                    fh.write("apikey = %s" % openml.config.apikey)
-
         # Increase the number of retries to avoid spurious server failures
         self.connection_n_retries = openml.config.connection_n_retries
         openml.config.connection_n_retries = 10
@@ -252,6 +240,55 @@ def _check_fold_timing_evaluations(
                         self.assertLessEqual(evaluation, max_val)
 
 
+def check_task_existence(
+    task_type: TaskType, dataset_id: int, target_name: str, **kwargs
+) -> Union[int, None]:
+    """Checks if any task with exists on test server that matches the meta data.
+
+    Parameter
+    ---------
+    task_type : openml.tasks.TaskType
+    dataset_id : int
+    target_name : str
+
+    Return
+    ------
+    int, None
+    """
+    return_val = None
+    tasks = openml.tasks.list_tasks(task_type=task_type, output_format="dataframe")
+    if len(tasks) == 0:
+        return None
+    tasks = cast(pd.DataFrame, tasks).loc[tasks["did"] == dataset_id]
+    if len(tasks) == 0:
+        return None
+    tasks = tasks.loc[tasks["target_feature"] == target_name]
+    if len(tasks) == 0:
+        return None
+    task_match = []
+    for task_id in tasks["tid"].to_list():
+        task_match.append(task_id)
+        try:
+            task = openml.tasks.get_task(task_id)
+        except OpenMLServerException:
+            # can fail if task_id deleted by another parallely run unit test
+            task_match.pop(-1)
+            return_val = None
+            continue
+        for k, v in kwargs.items():
+            if getattr(task, k) != v:
+                # even if one of the meta-data key mismatches, then task_id is not a match
+                task_match.pop(-1)
+                break
+        # if task_id is retained in the task_match list, it passed all meta key-value matches
+        if len(task_match) == 1:
+            return_val = task_id
+            break
+    if len(task_match) == 0:
+        return_val = None
+    return return_val
+
+
 try:
     from sklearn.impute import SimpleImputer
 except ImportError:
@@ -267,12 +304,4 @@ class CustomImputer(SimpleImputer):
     pass
 
 
-def cont(X):
-    return X.dtypes != "category"
-
-
-def cat(X):
-    return X.dtypes == "category"
-
-
-__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "cat", "cont"]
+__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "check_task_existence"]
diff --git a/openml/utils.py b/openml/utils.py
index a402564f9..a482bf0bc 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -9,6 +9,7 @@
 from functools import wraps
 import collections
 
+import openml
 import openml._api_calls
 import openml.exceptions
 from . import config
@@ -243,7 +244,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
                 limit=batch_size,
                 offset=current_offset,
                 output_format=output_format,
-                **active_filters
+                **active_filters,
             )
         except openml.exceptions.OpenMLServerNoResult:
             # we want to return an empty dict in this case
@@ -276,9 +277,11 @@ def _create_cache_directory(key):
     cache = config.get_cache_directory()
     cache_dir = os.path.join(cache, key)
     try:
-        os.makedirs(cache_dir)
-    except OSError:
-        pass
+        os.makedirs(cache_dir, exist_ok=True)
+    except Exception as e:
+        raise openml.exceptions.OpenMLCacheException(
+            f"Cannot create cache directory {cache_dir}."
+        ) from e
     return cache_dir
 
 
@@ -304,9 +307,9 @@ def _create_cache_directory_for_id(key, id_):
         Path of the created dataset cache directory.
     """
     cache_dir = os.path.join(_create_cache_directory(key), str(id_))
-    if os.path.exists(cache_dir) and os.path.isdir(cache_dir):
+    if os.path.isdir(cache_dir):
         pass
-    elif os.path.exists(cache_dir) and not os.path.isdir(cache_dir):
+    elif os.path.exists(cache_dir):
         raise ValueError("%s cache dir exists but is not a directory!" % key)
     else:
         os.makedirs(cache_dir)
diff --git a/setup.py b/setup.py
index 9e9a093e4..dc1a58863 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
         )
     )
 
-with open(os.path.join("README.md")) as fid:
+with open(os.path.join("README.md"), encoding="utf-8") as fid:
     README = fid.read()
 
 setuptools.setup(
@@ -53,6 +53,8 @@
         "pandas>=1.0.0",
         "scipy>=0.13.3",
         "numpy>=1.6.2",
+        "minio",
+        "pyarrow",
     ],
     extras_require={
         "test": [
@@ -65,9 +67,9 @@
             "nbformat",
             "oslo.concurrency",
             "flaky",
-            "pyarrow",
             "pre-commit",
             "pytest-cov",
+            "pytest-rerunfailures",
             "mypy",
         ],
         "examples": [
@@ -81,7 +83,8 @@
             "ipykernel",
             "seaborn",
         ],
-        "examples_unix": ["fanova",],
+        "examples_unix": ["fanova"],
+        "docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"],
     },
     test_suite="pytest",
     classifiers=[
diff --git a/tests/conftest.py b/tests/conftest.py
index 461a513fd..c1f728a72 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,6 +25,7 @@
 import os
 import logging
 from typing import List
+import pytest
 
 import openml
 from openml.testing import TestBase
@@ -34,16 +35,6 @@
 logger.setLevel(logging.DEBUG)
 
 file_list = []
-directory = None
-
-# finding the root directory of conftest.py and going up to OpenML main directory
-# exploiting the fact that conftest.py always resides in the root directory for tests
-static_dir = os.path.dirname(os.path.abspath(__file__))
-logger.info("static directory: {}".format(static_dir))
-while True:
-    if "openml" in os.listdir(static_dir):
-        break
-    static_dir = os.path.join(static_dir, "..")
 
 
 def worker_id() -> str:
@@ -65,12 +56,11 @@ def read_file_list() -> List[str]:
 
     :return: List[str]
     """
-    directory = os.path.join(static_dir, "tests/files/")
-    if worker_id() == "master":
-        logger.info("Collecting file lists from: {}".format(directory))
-    files = os.walk(directory)
+    this_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
+    directory = os.path.join(this_dir, "..")
+    logger.info("Collecting file lists from: {}".format(directory))
     file_list = []
-    for root, _, filenames in files:
+    for root, _, filenames in os.walk(directory):
         for filename in filenames:
             file_list.append(os.path.join(root, filename))
     return file_list
@@ -125,7 +115,7 @@ def delete_remote_files(tracker) -> None:
                 openml.utils._delete_entity(entity_type, entity)
                 logger.info("Deleted ({}, {})".format(entity_type, entity))
             except Exception as e:
-                logger.warn("Cannot delete ({},{}): {}".format(entity_type, entity, e))
+                logger.warning("Cannot delete ({},{}): {}".format(entity_type, entity, e))
 
 
 def pytest_sessionstart() -> None:
@@ -182,3 +172,17 @@ def pytest_sessionfinish() -> None:
         logger.info("Local files deleted")
 
     logger.info("{} is killed".format(worker))
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--long",
+        action="store_true",
+        default=False,
+        help="Run the long version of tests which support both short and long scenarios.",
+    )
+
+
+@pytest.fixture(scope="class")
+def long_version(request):
+    request.cls.long_version = request.config.getoption("--long")
diff --git a/tests/files/org/openml/test/datasets/30/dataset.pq b/tests/files/org/openml/test/datasets/30/dataset.pq
new file mode 100644
index 000000000..b35597281
Binary files /dev/null and b/tests/files/org/openml/test/datasets/30/dataset.pq differ
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index 73dbfa133..416fce534 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -1,7 +1,8 @@
 # License: BSD 3-Clause
 
+import os
 from time import time
-from warnings import filterwarnings, catch_warnings
+import unittest.mock
 
 import numpy as np
 import pandas as pd
@@ -49,6 +50,17 @@ def test_init_string_validation(self):
                 name="somename", description="a description", citation="Something by Müller"
             )
 
+    def test__unpack_categories_with_nan_likes(self):
+        # unpack_categories decodes numeric categorical values according to the header
+        # Containing a 'non' category in the header shouldn't lead to failure.
+        categories = ["a", "b", None, float("nan"), np.nan]
+        series = pd.Series([0, 1, None, float("nan"), np.nan, 1, 0])
+        clean_series = OpenMLDataset._unpack_categories(series, categories)
+
+        expected_values = ["a", "b", np.nan, np.nan, np.nan, "b", "a"]
+        self.assertListEqual(list(clean_series.values), expected_values)
+        self.assertListEqual(list(clean_series.cat.categories.values), list("ab"))
+
     def test_get_data_array(self):
         # Basic usage
         rval, _, categorical, attribute_names = self.dataset.get_data(dataset_format="array")
@@ -72,13 +84,13 @@ def test_get_data_pandas(self):
         self.assertEqual(data.shape[1], len(self.titanic.features))
         self.assertEqual(data.shape[0], 1309)
         col_dtype = {
-            "pclass": "float64",
+            "pclass": "uint8",
             "survived": "category",
             "name": "object",
             "sex": "category",
             "age": "float64",
-            "sibsp": "float64",
-            "parch": "float64",
+            "sibsp": "uint8",
+            "parch": "uint8",
             "ticket": "object",
             "fare": "float64",
             "cabin": "object",
@@ -118,21 +130,29 @@ def test_get_data_no_str_data_for_nparrays(self):
         with pytest.raises(PyOpenMLError, match=err_msg):
             self.titanic.get_data(dataset_format="array")
 
+    def _check_expected_type(self, dtype, is_cat, col):
+        if is_cat:
+            expected_type = "category"
+        elif not col.isna().any() and (col.astype("uint8") == col).all():
+            expected_type = "uint8"
+        else:
+            expected_type = "float64"
+
+        self.assertEqual(dtype.name, expected_type)
+
     def test_get_data_with_rowid(self):
         self.dataset.row_id_attribute = "condition"
         rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
         self.assertIsInstance(rval, pd.DataFrame)
-        for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = "category" if is_cat else "float64"
-            self.assertEqual(dtype.name, expected_type)
+        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+            self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
         rval, _, categorical, _ = self.dataset.get_data()
         self.assertIsInstance(rval, pd.DataFrame)
-        for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = "category" if is_cat else "float64"
-            self.assertEqual(dtype.name, expected_type)
+        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+            self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)
 
@@ -149,9 +169,8 @@ def test_get_data_with_target_array(self):
     def test_get_data_with_target_pandas(self):
         X, y, categorical, attribute_names = self.dataset.get_data(target="class")
         self.assertIsInstance(X, pd.DataFrame)
-        for (dtype, is_cat) in zip(X.dtypes, categorical):
-            expected_type = "category" if is_cat else "float64"
-            self.assertEqual(dtype.name, expected_type)
+        for (dtype, is_cat, col) in zip(X.dtypes, categorical, X):
+            self._check_expected_type(dtype, is_cat, X[col])
         self.assertIsInstance(y, pd.Series)
         self.assertEqual(y.dtype.name, "category")
 
@@ -174,27 +193,17 @@ def test_get_data_rowid_and_ignore_and_target(self):
     def test_get_data_with_ignore_attributes(self):
         self.dataset.ignore_attribute = ["condition"]
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
-        for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = "category" if is_cat else "float64"
-            self.assertEqual(dtype.name, expected_type)
+        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+            self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 39))
         self.assertEqual(len(categorical), 39)
 
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=False)
-        for (dtype, is_cat) in zip(rval.dtypes, categorical):
-            expected_type = "category" if is_cat else "float64"
-            self.assertEqual(dtype.name, expected_type)
+        for (dtype, is_cat, col) in zip(rval.dtypes, categorical, rval):
+            self._check_expected_type(dtype, is_cat, rval[col])
         self.assertEqual(rval.shape, (898, 38))
         self.assertEqual(len(categorical), 38)
 
-    def test_dataset_format_constructor(self):
-
-        with catch_warnings():
-            filterwarnings("error")
-            self.assertRaises(
-                DeprecationWarning, openml.OpenMLDataset, "Test", "Test", format="arff"
-            )
-
     def test_get_data_with_nonexisting_class(self):
         # This class is using the anneal dataset with labels [1, 2, 3, 4, 5, 'U']. However,
         # label 4 does not exist and we test that the features 5 and 'U' are correctly mapped to
@@ -350,7 +359,48 @@ def test_get_sparse_categorical_data_id_395(self):
         self.assertEqual(len(feature.nominal_values), 25)
 
 
-class OpenMLDatasetQualityTest(TestBase):
+class OpenMLDatasetFunctionTest(TestBase):
+    @unittest.mock.patch("openml.datasets.dataset.pickle")
+    @unittest.mock.patch("openml.datasets.dataset._get_features_pickle_file")
+    def test__read_features(self, filename_mock, pickle_mock):
+        """Test we read the features from the xml if no cache pickle is available.
+
+        This test also does some simple checks to verify that the features are read correctly"""
+        filename_mock.return_value = os.path.join(self.workdir, "features.xml.pkl")
+        pickle_mock.load.side_effect = FileNotFoundError
+        features = openml.datasets.dataset._read_features(
+            os.path.join(
+                self.static_cache_dir, "org", "openml", "test", "datasets", "2", "features.xml"
+            )
+        )
+        self.assertIsInstance(features, dict)
+        self.assertEqual(len(features), 39)
+        self.assertIsInstance(features[0], OpenMLDataFeature)
+        self.assertEqual(features[0].name, "family")
+        self.assertEqual(len(features[0].nominal_values), 9)
+        # pickle.load is never called because the features pickle file didn't exist
+        self.assertEqual(pickle_mock.load.call_count, 0)
+        self.assertEqual(pickle_mock.dump.call_count, 1)
+
+    @unittest.mock.patch("openml.datasets.dataset.pickle")
+    @unittest.mock.patch("openml.datasets.dataset._get_qualities_pickle_file")
+    def test__read_qualities(self, filename_mock, pickle_mock):
+        """Test we read the qualities from the xml if no cache pickle is available.
+
+        This test also does some minor checks to ensure that the qualities are read correctly."""
+        filename_mock.return_value = os.path.join(self.workdir, "qualities.xml.pkl")
+        pickle_mock.load.side_effect = FileNotFoundError
+        qualities = openml.datasets.dataset._read_qualities(
+            os.path.join(
+                self.static_cache_dir, "org", "openml", "test", "datasets", "2", "qualities.xml"
+            )
+        )
+        self.assertIsInstance(qualities, dict)
+        self.assertEqual(len(qualities), 106)
+        # pickle.load is never called because the qualities pickle file didn't exist
+        self.assertEqual(pickle_mock.load.call_count, 0)
+        self.assertEqual(pickle_mock.dump.call_count, 1)
+
     def test__check_qualities(self):
         qualities = [{"oml:name": "a", "oml:value": "0.5"}]
         qualities = openml.datasets.dataset._check_qualities(qualities)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index c6e6f78f8..ec9dd6c53 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1,9 +1,11 @@
 # License: BSD 3-Clause
 
 import os
+import pathlib
 import random
 from itertools import product
 from unittest import mock
+import shutil
 
 import arff
 import time
@@ -16,8 +18,8 @@
 
 import openml
 from openml import OpenMLDataset
+from openml._api_calls import _download_minio_file
 from openml.exceptions import (
-    OpenMLCacheException,
     OpenMLHashException,
     OpenMLPrivateDatasetError,
     OpenMLServerException,
@@ -27,19 +29,19 @@
 from openml.datasets.functions import (
     create_dataset,
     attributes_arff_from_df,
-    _get_cached_dataset,
-    _get_cached_dataset_features,
-    _get_cached_dataset_qualities,
-    _get_cached_datasets,
     _get_dataset_arff,
     _get_dataset_description,
-    _get_dataset_features,
-    _get_dataset_qualities,
+    _get_dataset_features_file,
+    _get_dataset_qualities_file,
     _get_online_dataset_arff,
     _get_online_dataset_format,
     DATASETS_CACHE_DIR_NAME,
+    _get_dataset_parquet,
+    _topic_add_dataset,
+    _topic_delete_dataset,
 )
 from openml.datasets import fork_dataset, edit_dataset
+from openml.tasks import TaskType, create_task
 
 
 class TestOpenMLDataset(TestBase):
@@ -85,60 +87,6 @@ def _get_empty_param_for_dataset(self):
             "data": None,
         }
 
-    def test__list_cached_datasets(self):
-        openml.config.cache_directory = self.static_cache_dir
-        cached_datasets = openml.datasets.functions._list_cached_datasets()
-        self.assertIsInstance(cached_datasets, list)
-        self.assertEqual(len(cached_datasets), 2)
-        self.assertIsInstance(cached_datasets[0], int)
-
-    @mock.patch("openml.datasets.functions._list_cached_datasets")
-    def test__get_cached_datasets(self, _list_cached_datasets_mock):
-        openml.config.cache_directory = self.static_cache_dir
-        _list_cached_datasets_mock.return_value = [-1, 2]
-        datasets = _get_cached_datasets()
-        self.assertIsInstance(datasets, dict)
-        self.assertEqual(len(datasets), 2)
-        self.assertIsInstance(list(datasets.values())[0], OpenMLDataset)
-
-    def test__get_cached_dataset(self,):
-        openml.config.cache_directory = self.static_cache_dir
-        dataset = _get_cached_dataset(2)
-        features = _get_cached_dataset_features(2)
-        qualities = _get_cached_dataset_qualities(2)
-        self.assertIsInstance(dataset, OpenMLDataset)
-        self.assertTrue(len(dataset.features) > 0)
-        self.assertTrue(len(dataset.features) == len(features["oml:feature"]))
-        self.assertTrue(len(dataset.qualities) == len(qualities))
-
-    def test_get_cached_dataset_description(self):
-        openml.config.cache_directory = self.static_cache_dir
-        description = openml.datasets.functions._get_cached_dataset_description(2)
-        self.assertIsInstance(description, dict)
-
-    def test_get_cached_dataset_description_not_cached(self):
-        openml.config.cache_directory = self.static_cache_dir
-        self.assertRaisesRegex(
-            OpenMLCacheException,
-            "Dataset description for dataset id 3 not cached",
-            openml.datasets.functions._get_cached_dataset_description,
-            dataset_id=3,
-        )
-
-    def test_get_cached_dataset_arff(self):
-        openml.config.cache_directory = self.static_cache_dir
-        description = openml.datasets.functions._get_cached_dataset_arff(dataset_id=2)
-        self.assertIsInstance(description, str)
-
-    def test_get_cached_dataset_arff_not_cached(self):
-        openml.config.cache_directory = self.static_cache_dir
-        self.assertRaisesRegex(
-            OpenMLCacheException,
-            "ARFF file for dataset id 3 not cached",
-            openml.datasets.functions._get_cached_dataset_arff,
-            dataset_id=3,
-        )
-
     def _check_dataset(self, dataset):
         self.assertEqual(type(dataset), dict)
         self.assertGreaterEqual(len(dataset), 2)
@@ -227,9 +175,10 @@ def test_list_datasets_empty(self):
     def test_check_datasets_active(self):
         # Have to test on live because there is no deactivated dataset on the test server.
         openml.config.server = self.production_server
-        active = openml.datasets.check_datasets_active([2, 17])
+        active = openml.datasets.check_datasets_active([2, 17, 79], raise_error_if_not_exist=False,)
         self.assertTrue(active[2])
         self.assertFalse(active[17])
+        self.assertIsNone(active.get(79))
         self.assertRaisesRegex(
             ValueError,
             "Could not find dataset 79 in OpenML dataset list.",
@@ -369,6 +318,13 @@ def test_get_dataset_by_name(self):
         openml.config.server = self.production_server
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45)
 
+    def test_get_dataset_uint8_dtype(self):
+        dataset = openml.datasets.get_dataset(1)
+        self.assertEqual(type(dataset), OpenMLDataset)
+        self.assertEqual(dataset.name, "anneal")
+        df, _, _, _ = dataset.get_data()
+        self.assertEqual(df["carbon"].dtype, "uint8")
+
     def test_get_dataset(self):
         # This is the only non-lazy load to ensure default behaviour works.
         dataset = openml.datasets.get_dataset(1)
@@ -451,11 +407,99 @@ def test__get_dataset_description(self):
 
     def test__getarff_path_dataset_arff(self):
         openml.config.cache_directory = self.static_cache_dir
-        description = openml.datasets.functions._get_cached_dataset_description(2)
+        description = _get_dataset_description(self.workdir, 2)
         arff_path = _get_dataset_arff(description, cache_directory=self.workdir)
         self.assertIsInstance(arff_path, str)
         self.assertTrue(os.path.exists(arff_path))
 
+    def test__download_minio_file_object_does_not_exist(self):
+        self.assertRaisesRegex(
+            FileNotFoundError,
+            r"Object at .* does not exist",
+            _download_minio_file,
+            source="http://openml1.win.tue.nl/dataset20/i_do_not_exist.pq",
+            destination=self.workdir,
+            exists_ok=True,
+        )
+
+    def test__download_minio_file_to_directory(self):
+        _download_minio_file(
+            source="http://openml1.win.tue.nl/dataset20/dataset_20.pq",
+            destination=self.workdir,
+            exists_ok=True,
+        )
+        self.assertTrue(
+            os.path.isfile(os.path.join(self.workdir, "dataset_20.pq")),
+            "_download_minio_file can save to a folder by copying the object name",
+        )
+
+    def test__download_minio_file_to_path(self):
+        file_destination = os.path.join(self.workdir, "custom.pq")
+        _download_minio_file(
+            source="http://openml1.win.tue.nl/dataset20/dataset_20.pq",
+            destination=file_destination,
+            exists_ok=True,
+        )
+        self.assertTrue(
+            os.path.isfile(file_destination),
+            "_download_minio_file can save to a folder by copying the object name",
+        )
+
+    def test__download_minio_file_raises_FileExists_if_destination_in_use(self):
+        file_destination = pathlib.Path(self.workdir, "custom.pq")
+        file_destination.touch()
+
+        self.assertRaises(
+            FileExistsError,
+            _download_minio_file,
+            source="http://openml1.win.tue.nl/dataset20/dataset_20.pq",
+            destination=str(file_destination),
+            exists_ok=False,
+        )
+
+    def test__download_minio_file_works_with_bucket_subdirectory(self):
+        file_destination = pathlib.Path(self.workdir, "custom.csv")
+        _download_minio_file(
+            source="http://openml1.win.tue.nl/test/subdirectory/test.csv",
+            destination=file_destination,
+            exists_ok=True,
+        )
+        self.assertTrue(
+            os.path.isfile(file_destination),
+            "_download_minio_file can download from subdirectories",
+        )
+
+    def test__get_dataset_parquet_not_cached(self):
+        description = {
+            "oml:minio_url": "http://openml1.win.tue.nl/dataset20/dataset_20.pq",
+            "oml:id": "20",
+        }
+        path = _get_dataset_parquet(description, cache_directory=self.workdir)
+        self.assertIsInstance(path, str, "_get_dataset_parquet returns a path")
+        self.assertTrue(os.path.isfile(path), "_get_dataset_parquet returns path to real file")
+
+    @mock.patch("openml._api_calls._download_minio_file")
+    def test__get_dataset_parquet_is_cached(self, patch):
+        openml.config.cache_directory = self.static_cache_dir
+        patch.side_effect = RuntimeError(
+            "_download_minio_file should not be called when loading from cache"
+        )
+        description = {
+            "oml:minio_url": "http://openml1.win.tue.nl/dataset30/dataset_30.pq",
+            "oml:id": "30",
+        }
+        path = _get_dataset_parquet(description, cache_directory=None)
+        self.assertIsInstance(path, str, "_get_dataset_parquet returns a path")
+        self.assertTrue(os.path.isfile(path), "_get_dataset_parquet returns path to real file")
+
+    def test__get_dataset_parquet_file_does_not_exist(self):
+        description = {
+            "oml:minio_url": "http://openml1.win.tue.nl/dataset20/does_not_exist.pq",
+            "oml:id": "20",
+        }
+        path = _get_dataset_parquet(description, cache_directory=self.workdir)
+        self.assertIsNone(path, "_get_dataset_parquet returns None if no file is found")
+
     def test__getarff_md5_issue(self):
         description = {
             "oml:id": 5,
@@ -464,23 +508,27 @@ def test__getarff_md5_issue(self):
         }
         self.assertRaisesRegex(
             OpenMLHashException,
-            "Checksum ad484452702105cbf3d30f8deaba39a9 of downloaded file "
-            "is unequal to the expected checksum abc. "
-            "Raised when downloading dataset 5.",
+            "Checksum of downloaded file is unequal to the expected checksum abc when downloading "
+            "https://www.openml.org/data/download/61. Raised when downloading dataset 5.",
             _get_dataset_arff,
             description,
         )
 
     def test__get_dataset_features(self):
-        features = _get_dataset_features(self.workdir, 2)
-        self.assertIsInstance(features, dict)
+        features_file = _get_dataset_features_file(self.workdir, 2)
+        self.assertIsInstance(features_file, str)
         features_xml_path = os.path.join(self.workdir, "features.xml")
         self.assertTrue(os.path.exists(features_xml_path))
 
     def test__get_dataset_qualities(self):
-        # Only a smoke check
-        qualities = _get_dataset_qualities(self.workdir, 2)
-        self.assertIsInstance(qualities, list)
+        qualities = _get_dataset_qualities_file(self.workdir, 2)
+        self.assertIsInstance(qualities, str)
+        qualities_xml_path = os.path.join(self.workdir, "qualities.xml")
+        self.assertTrue(os.path.exists(qualities_xml_path))
+
+    def test__get_dataset_skip_download(self):
+        qualities = openml.datasets.get_dataset(2, download_qualities=False).qualities
+        self.assertIsNone(qualities)
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
@@ -547,6 +595,7 @@ def test_upload_dataset_with_url(self):
         )
         self.assertIsInstance(dataset.dataset_id, int)
 
+    @pytest.mark.flaky()
     def test_data_status(self):
         dataset = OpenMLDataset(
             "%s-UploadTestWithURL" % self._get_sentinel(),
@@ -864,6 +913,24 @@ def test_get_online_dataset_arff(self):
             "ARFF files are not equal",
         )
 
+    def test_topic_api_error(self):
+        # Check server exception when non-admin accessses apis
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Topic can only be added/removed by admin.",
+            _topic_add_dataset,
+            data_id=31,
+            topic="business",
+        )
+        # Check server exception when non-admin accessses apis
+        self.assertRaisesRegex(
+            OpenMLServerException,
+            "Topic can only be added/removed by admin.",
+            _topic_delete_dataset,
+            data_id=31,
+            topic="business",
+        )
+
     def test_get_online_dataset_format(self):
 
         # Phoneme dataset
@@ -897,7 +964,6 @@ def test_create_dataset_pandas(self):
         collection_date = "01-01-2018"
         language = "English"
         licence = "MIT"
-        default_target_attribute = "play"
         citation = "None"
         original_data_url = "http://openml.github.io/openml-python"
         paper_url = "http://openml.github.io/openml-python"
@@ -909,7 +975,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="play",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
@@ -944,7 +1010,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="y",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
@@ -980,7 +1046,7 @@ def test_create_dataset_pandas(self):
             collection_date=collection_date,
             language=language,
             licence=licence,
-            default_target_attribute=default_target_attribute,
+            default_target_attribute="rnd_str",
             row_id_attribute=None,
             ignore_attribute=None,
             citation=citation,
@@ -1147,27 +1213,31 @@ def test_publish_fetch_ignore_attribute(self):
         # test if publish was successful
         self.assertIsInstance(dataset.id, int)
 
+        downloaded_dataset = self._wait_for_dataset_being_processed(dataset.id)
+        self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute)
+
+    def _wait_for_dataset_being_processed(self, dataset_id):
         downloaded_dataset = None
         # fetching from server
         # loop till timeout or fetch not successful
-        max_waiting_time_seconds = 400
+        max_waiting_time_seconds = 600
         # time.time() works in seconds
         start_time = time.time()
         while time.time() - start_time < max_waiting_time_seconds:
             try:
-                downloaded_dataset = openml.datasets.get_dataset(dataset.id)
+                downloaded_dataset = openml.datasets.get_dataset(dataset_id)
                 break
             except Exception as e:
                 # returned code 273: Dataset not processed yet
                 # returned code 362: No qualities found
                 TestBase.logger.error(
-                    "Failed to fetch dataset:{} with '{}'.".format(dataset.id, str(e))
+                    "Failed to fetch dataset:{} with '{}'.".format(dataset_id, str(e))
                 )
                 time.sleep(10)
                 continue
         if downloaded_dataset is None:
-            raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset.id))
-        self.assertEqual(downloaded_dataset.ignore_attribute, ignore_attribute)
+            raise ValueError("TIMEOUT: Failed to fetch uploaded dataset - {}".format(dataset_id))
+        return downloaded_dataset
 
     def test_create_dataset_row_id_attribute_error(self):
         # meta-information
@@ -1303,6 +1373,8 @@ def test_list_qualities(self):
 
     def test_get_dataset_cache_format_pickle(self):
         dataset = openml.datasets.get_dataset(1)
+        dataset.get_data()
+
         self.assertEqual(type(dataset), OpenMLDataset)
         self.assertEqual(dataset.name, "anneal")
         self.assertGreater(len(dataset.features), 1)
@@ -1317,6 +1389,7 @@ def test_get_dataset_cache_format_pickle(self):
     def test_get_dataset_cache_format_feather(self):
 
         dataset = openml.datasets.get_dataset(128, cache_format="feather")
+        dataset.get_data()
 
         # Check if dataset is written to cache directory using feather
         cache_dir = openml.config.get_cache_directory()
@@ -1340,7 +1413,7 @@ def test_get_dataset_cache_format_feather(self):
         self.assertEqual(len(categorical), X.shape[1])
         self.assertEqual(len(attribute_names), X.shape[1])
 
-    def test_data_edit(self):
+    def test_data_edit_non_critical_field(self):
         # Case 1
         # All users can edit non-critical fields of datasets
         desc = (
@@ -1361,14 +1434,31 @@ def test_data_edit(self):
         edited_dataset = openml.datasets.get_dataset(did)
         self.assertEqual(edited_dataset.description, desc)
 
+    def test_data_edit_critical_field(self):
         # Case 2
         # only owners (or admin) can edit all critical fields of datasets
-        # this is a dataset created by CI, so it is editable by this test
-        did = 315
-        result = edit_dataset(did, default_target_attribute="col_1", ignore_attribute="col_2")
+        # for this, we need to first clone a dataset to do changes
+        did = fork_dataset(1)
+        self._wait_for_dataset_being_processed(did)
+        result = edit_dataset(did, default_target_attribute="shape", ignore_attribute="oil")
         self.assertEqual(did, result)
-        edited_dataset = openml.datasets.get_dataset(did)
-        self.assertEqual(edited_dataset.ignore_attribute, ["col_2"])
+
+        n_tries = 10
+        # we need to wait for the edit to be reflected on the server
+        for i in range(n_tries):
+            edited_dataset = openml.datasets.get_dataset(did)
+            try:
+                self.assertEqual(edited_dataset.default_target_attribute, "shape", edited_dataset)
+                self.assertEqual(edited_dataset.ignore_attribute, ["oil"], edited_dataset)
+                break
+            except AssertionError as e:
+                if i == n_tries - 1:
+                    raise e
+                time.sleep(10)
+                # Delete the cache dir to get the newer version of the dataset
+                shutil.rmtree(
+                    os.path.join(self.workdir, "org", "openml", "test", "datasets", str(did))
+                )
 
     def test_data_edit_errors(self):
         # Check server exception when no field to edit is provided
@@ -1379,7 +1469,7 @@ def test_data_edit_errors(self):
             "original_data_url, default_target_attribute, row_id_attribute, "
             "ignore_attribute or paper_url to edit.",
             edit_dataset,
-            data_id=564,
+            data_id=64,  # blood-transfusion-service-center
         )
         # Check server exception when unknown dataset is provided
         self.assertRaisesRegex(
@@ -1389,15 +1479,32 @@ def test_data_edit_errors(self):
             data_id=999999,
             description="xor operation dataset",
         )
+
+        # Need to own a dataset to be able to edit meta-data
+        # Will be creating a forked version of an existing dataset to allow the unit test user
+        #  to edit meta-data of a dataset
+        did = fork_dataset(1)
+        self._wait_for_dataset_being_processed(did)
+        TestBase._mark_entity_for_removal("data", did)
+        # Need to upload a task attached to this data to test edit failure
+        task = create_task(
+            task_type=TaskType.SUPERVISED_CLASSIFICATION,
+            dataset_id=did,
+            target_name="class",
+            estimation_procedure_id=1,
+        )
+        task = task.publish()
+        TestBase._mark_entity_for_removal("task", task.task_id)
         # Check server exception when owner/admin edits critical fields of dataset with tasks
         self.assertRaisesRegex(
             OpenMLServerException,
             "Critical features default_target_attribute, row_id_attribute and ignore_attribute "
             "can only be edited for datasets without any tasks.",
             edit_dataset,
-            data_id=223,
+            data_id=did,
             default_target_attribute="y",
         )
+
         # Check server exception when a non-owner or non-admin tries to edit critical fields
         self.assertRaisesRegex(
             OpenMLServerException,
@@ -1416,3 +1523,124 @@ def test_data_fork(self):
         self.assertRaisesRegex(
             OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999,
         )
+
+    def test_get_dataset_parquet(self):
+        dataset = openml.datasets.get_dataset(20)
+        self.assertIsNotNone(dataset._minio_url)
+        self.assertIsNotNone(dataset.parquet_file)
+        self.assertTrue(os.path.isfile(dataset.parquet_file))
+
+
+@pytest.mark.parametrize(
+    "default_target_attribute,row_id_attribute,ignore_attribute",
+    [
+        ("wrong", None, None),
+        (None, "wrong", None),
+        (None, None, "wrong"),
+        ("wrong,sunny", None, None),
+        (None, None, "wrong,sunny"),
+        (["wrong", "sunny"], None, None),
+        (None, None, ["wrong", "sunny"]),
+    ],
+)
+def test_invalid_attribute_validations(
+    default_target_attribute, row_id_attribute, ignore_attribute
+):
+    data = [
+        ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+        ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+        ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+        ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+        ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+    ]
+    column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+    df = pd.DataFrame(data, columns=column_names)
+    # enforce the type of each column
+    df["outlook"] = df["outlook"].astype("category")
+    df["windy"] = df["windy"].astype("bool")
+    df["play"] = df["play"].astype("category")
+    # meta-information
+    name = "pandas_testing_dataset"
+    description = "Synthetic dataset created from a Pandas DataFrame"
+    creator = "OpenML tester"
+    collection_date = "01-01-2018"
+    language = "English"
+    licence = "MIT"
+    citation = "None"
+    original_data_url = "http://openml.github.io/openml-python"
+    paper_url = "http://openml.github.io/openml-python"
+    with pytest.raises(ValueError, match="should be one of the data attribute"):
+        _ = openml.datasets.functions.create_dataset(
+            name=name,
+            description=description,
+            creator=creator,
+            contributor=None,
+            collection_date=collection_date,
+            language=language,
+            licence=licence,
+            default_target_attribute=default_target_attribute,
+            row_id_attribute=row_id_attribute,
+            ignore_attribute=ignore_attribute,
+            citation=citation,
+            attributes="auto",
+            data=df,
+            version_label="test",
+            original_data_url=original_data_url,
+            paper_url=paper_url,
+        )
+
+
+@pytest.mark.parametrize(
+    "default_target_attribute,row_id_attribute,ignore_attribute",
+    [
+        ("outlook", None, None),
+        (None, "outlook", None),
+        (None, None, "outlook"),
+        ("outlook,windy", None, None),
+        (None, None, "outlook,windy"),
+        (["outlook", "windy"], None, None),
+        (None, None, ["outlook", "windy"]),
+    ],
+)
+def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute):
+    data = [
+        ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+        ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+        ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+        ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+        ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+    ]
+    column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+    df = pd.DataFrame(data, columns=column_names)
+    # enforce the type of each column
+    df["outlook"] = df["outlook"].astype("category")
+    df["windy"] = df["windy"].astype("bool")
+    df["play"] = df["play"].astype("category")
+    # meta-information
+    name = "pandas_testing_dataset"
+    description = "Synthetic dataset created from a Pandas DataFrame"
+    creator = "OpenML tester"
+    collection_date = "01-01-2018"
+    language = "English"
+    licence = "MIT"
+    citation = "None"
+    original_data_url = "http://openml.github.io/openml-python"
+    paper_url = "http://openml.github.io/openml-python"
+    _ = openml.datasets.functions.create_dataset(
+        name=name,
+        description=description,
+        creator=creator,
+        contributor=None,
+        collection_date=collection_date,
+        language=language,
+        licence=licence,
+        default_target_attribute=default_target_attribute,
+        row_id_attribute=row_id_attribute,
+        ignore_attribute=ignore_attribute,
+        citation=citation,
+        attributes="auto",
+        data=df,
+        version_label="test",
+        original_data_url=original_data_url,
+        paper_url=paper_url,
+    )
diff --git a/tests/test_evaluations/__init__.py b/tests/test_evaluations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index e4de9b03c..70f36ce19 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -1,10 +1,12 @@
 # License: BSD 3-Clause
+import pytest
 
 import openml
 import openml.evaluations
 from openml.testing import TestBase
 
 
+@pytest.mark.usefixtures("long_version")
 class TestEvaluationFunctions(TestBase):
     _multiprocess_can_split_ = True
 
@@ -27,6 +29,10 @@ def _check_list_evaluation_setups(self, **kwargs):
 
         # Check if output and order of list_evaluations is preserved
         self.assertSequenceEqual(evals_setups["run_id"].tolist(), evals["run_id"].tolist())
+
+        if not self.long_version:
+            evals_setups = evals_setups.head(1)
+
         # Check if the hyper-parameter column is as accurate and flow_id
         for index, row in evals_setups.iterrows():
             params = openml.runs.get_run(row["run_id"]).parameter_settings
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index d34dc2ad3..c1f88bcda 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -40,7 +40,8 @@
 from openml.flows import OpenMLFlow
 from openml.flows.functions import assert_flows_equal
 from openml.runs.trace import OpenMLRunTrace
-from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
+from openml.testing import TestBase, SimpleImputer, CustomImputer
+from openml.extensions.sklearn import cat, cont
 
 
 this_directory = os.path.dirname(os.path.abspath(__file__))
@@ -145,7 +146,7 @@ def test_serialize_model(self):
         fixture_short_name = "sklearn.DecisionTreeClassifier"
         # str obtained from self.extension._get_sklearn_description(model)
         fixture_description = "A decision tree classifier."
-        version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+        version_fixture = self.extension._min_dependency_str(sklearn.__version__)
 
         presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"'
         # min_impurity_decrease has been introduced in 0.20
@@ -188,6 +189,8 @@ def test_serialize_model(self):
         if LooseVersion(sklearn.__version__) >= "0.22":
             fixture_parameters.update({"ccp_alpha": "0.0"})
             fixture_parameters.move_to_end("ccp_alpha", last=False)
+        if LooseVersion(sklearn.__version__) >= "0.24":
+            del fixture_parameters["presort"]
 
         structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []}
 
@@ -224,7 +227,7 @@ def test_serialize_model_clustering(self):
         fixture_description = "K-Means clustering{}".format(
             "" if LooseVersion(sklearn.__version__) < "0.22" else "."
         )
-        version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__
+        version_fixture = self.extension._min_dependency_str(sklearn.__version__)
 
         n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
         precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"'
@@ -1251,7 +1254,7 @@ def test_paralizable_check(self):
         # using this param distribution should raise an exception
         illegal_param_dist = {"base__n_jobs": [-1, 0, 1]}
         # using this param distribution should not raise an exception
-        legal_param_dist = {"base__max_depth": [2, 3, 4]}
+        legal_param_dist = {"n_estimators": [2, 3, 4]}
 
         legal_models = [
             sklearn.ensemble.RandomForestClassifier(),
@@ -1279,12 +1282,19 @@ def test_paralizable_check(self):
 
         can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False]
         can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False]
+        if LooseVersion(sklearn.__version__) < "0.20":
+            has_refit_time = [False, False, False, False, False, False, False, False, False]
+        else:
+            has_refit_time = [False, False, False, False, False, False, True, True, False]
 
-        for model, allowed_cputime, allowed_walltime in zip(
-            legal_models, can_measure_cputime_answers, can_measure_walltime_answers
+        X, y = sklearn.datasets.load_iris(return_X_y=True)
+        for model, allowed_cputime, allowed_walltime, refit_time in zip(
+            legal_models, can_measure_cputime_answers, can_measure_walltime_answers, has_refit_time
         ):
             self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime)
             self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime)
+            model.fit(X, y)
+            self.assertEqual(refit_time, hasattr(model, "refit_time_"))
 
         for model in illegal_models:
             with self.assertRaises(PyOpenMLError):
@@ -1316,12 +1326,18 @@ def test__get_fn_arguments_with_defaults(self):
                 (sklearn.tree.DecisionTreeClassifier.__init__, 14),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
-        else:
+        elif sklearn_version < "0.24":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 18),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 14),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
+        else:
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 18),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 13),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
@@ -1464,7 +1480,7 @@ def test_openml_param_name_to_sklearn(self):
         )
         model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("boosting", boosting)])
         flow = self.extension.model_to_flow(model)
-        task = openml.tasks.get_task(115)
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         run = openml.runs.run_flow_on_task(flow, task)
         run = run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
@@ -1522,7 +1538,7 @@ def test_obtain_parameter_values(self):
                 "bootstrap": [True, False],
                 "criterion": ["gini", "entropy"],
             },
-            cv=sklearn.model_selection.StratifiedKFold(n_splits=2, random_state=1),
+            cv=sklearn.model_selection.StratifiedKFold(n_splits=2, random_state=1, shuffle=True),
             n_iter=5,
         )
         flow = self.extension.model_to_flow(model)
@@ -1560,7 +1576,7 @@ def setUp(self):
     # Test methods for performing runs with this extension module
 
     def test_run_model_on_task(self):
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
         # using most_frequent imputer since dataset has mixed types and to keep things simple
         pipe = sklearn.pipeline.Pipeline(
             [
@@ -1625,7 +1641,7 @@ def test_seed_model_raises(self):
                 self.extension.seed_model(model=clf, seed=42)
 
     def test_run_model_on_fold_classification_1_array(self):
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
 
         X, y = task.get_X_and_y()
         train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
@@ -1688,7 +1704,7 @@ def test_run_model_on_fold_classification_1_array(self):
     def test_run_model_on_fold_classification_1_dataframe(self):
         from sklearn.compose import ColumnTransformer
 
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
 
         # diff test_run_model_on_fold_classification_1_array()
         X, y = task.get_X_and_y(dataset_format="dataframe")
@@ -1752,7 +1768,7 @@ def test_run_model_on_fold_classification_1_dataframe(self):
         )
 
     def test_run_model_on_fold_classification_2(self):
-        task = openml.tasks.get_task(7)
+        task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
 
         X, y = task.get_X_and_y()
         train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0)
@@ -1814,7 +1830,11 @@ def predict_proba(*args, **kwargs):
                 raise AttributeError("predict_proba is not available when " "probability=False")
 
         # task 1 (test server) is important: it is a task with an unused class
-        tasks = [1, 3, 115]
+        tasks = [
+            1,  # anneal; crossvalidation
+            3,  # anneal; crossvalidation
+            115,  # diabetes; crossvalidation
+        ]
         flow = unittest.mock.Mock()
         flow.name = "dummy"
 
@@ -1968,7 +1988,7 @@ def test__extract_trace_data(self):
             "max_iter": [10, 20, 40, 80],
         }
         num_iters = 10
-        task = openml.tasks.get_task(20)
+        task = openml.tasks.get_task(20)  # balance-scale; crossvalidation
         clf = sklearn.model_selection.RandomizedSearchCV(
             sklearn.neural_network.MLPClassifier(), param_grid, num_iters,
         )
@@ -2079,8 +2099,8 @@ def test_run_on_model_with_empty_steps(self):
         from sklearn.compose import ColumnTransformer
 
         # testing 'drop', 'passthrough', None as non-actionable sklearn estimators
-        dataset = openml.datasets.get_dataset(128)
-        task = openml.tasks.get_task(59)
+        dataset = openml.datasets.get_dataset(128)  # iris
+        task = openml.tasks.get_task(59)  # mfeat-pixel; crossvalidation
 
         X, y, categorical_ind, feature_names = dataset.get_data(
             target=dataset.default_target_attribute, dataset_format="array"
@@ -2183,16 +2203,6 @@ def test_failed_serialization_of_custom_class(self):
             # for lower versions
             from sklearn.preprocessing import Imputer as SimpleImputer
 
-        class CustomImputer(SimpleImputer):
-            pass
-
-        def cont(X):
-            return X.dtypes != "category"
-
-        def cat(X):
-            return X.dtypes == "category"
-
-        import sklearn.metrics
         import sklearn.tree
         from sklearn.pipeline import Pipeline, make_pipeline
         from sklearn.compose import ColumnTransformer
@@ -2207,7 +2217,7 @@ def cat(X):
             steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
         )  # build a sklearn classifier
 
-        task = openml.tasks.get_task(253)  # data with mixed types from test server
+        task = openml.tasks.get_task(253)  # profb; crossvalidation
         try:
             _ = openml.runs.run_model_on_task(clf, task)
         except AttributeError as e:
@@ -2215,3 +2225,38 @@ def cat(X):
                 raise AttributeError(e)
             else:
                 raise Exception(e)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="columntransformer introduction in 0.20.0",
+    )
+    def test_setupid_with_column_transformer(self):
+        """Test to check if inclusion of ColumnTransformer in a pipleline is treated as a new
+        flow each time.
+        """
+        import sklearn.compose
+        from sklearn.svm import SVC
+
+        def column_transformer_pipe(task_id):
+            task = openml.tasks.get_task(task_id)
+            # make columntransformer
+            preprocessor = sklearn.compose.ColumnTransformer(
+                transformers=[
+                    ("num", StandardScaler(), cont),
+                    ("cat", OneHotEncoder(handle_unknown="ignore"), cat),
+                ]
+            )
+            # make pipeline
+            clf = SVC(gamma="scale", random_state=1)
+            pipe = make_pipeline(preprocessor, clf)
+            # run task
+            run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
+            run.publish()
+            new_run = openml.runs.get_run(run.run_id)
+            return new_run
+
+        run1 = column_transformer_pipe(11)  # only categorical
+        TestBase._mark_entity_for_removal("run", run1.run_id)
+        run2 = column_transformer_pipe(23)  # only numeric
+        TestBase._mark_entity_for_removal("run", run2.run_id)
+        self.assertEqual(run1.setup_id, run2.setup_id)
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 12af05ffe..a65dcbf70 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -2,18 +2,22 @@
 
 from collections import OrderedDict
 import copy
+import functools
 import unittest
+from unittest.mock import patch
 
 from distutils.version import LooseVersion
 import sklearn
 from sklearn import ensemble
 import pandas as pd
+import pytest
 
 import openml
 from openml.testing import TestBase
 import openml.extensions.sklearn
 
 
+@pytest.mark.usefixtures("long_version")
 class TestFlowFunctions(TestBase):
     _multiprocess_can_split_ = True
 
@@ -321,8 +325,16 @@ def test_get_flow_reinstantiate_model_wrong_version(self):
         # Note that CI does not test against 0.19.1.
         openml.config.server = self.production_server
         _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
-        flow = 8175
-        expected = "Trying to deserialize a model with dependency" " sklearn==0.19.1 not satisfied."
+        if sklearn_major > 23:
+            flow = 18587  # 18687, 18725 --- flows building random forest on >= 0.23
+            flow_sklearn_version = "0.23.1"
+        else:
+            flow = 8175
+            flow_sklearn_version = "0.19.1"
+        expected = (
+            "Trying to deserialize a model with dependency "
+            "sklearn=={} not satisfied.".format(flow_sklearn_version)
+        )
         self.assertRaisesRegex(
             ValueError, expected, openml.flows.get_flow, flow_id=flow, reinstantiate=True
         )
@@ -331,23 +343,34 @@ def test_get_flow_reinstantiate_model_wrong_version(self):
             flow = openml.flows.get_flow(flow_id=flow, reinstantiate=True, strict_version=False)
             # ensure that a new flow was created
             assert flow.flow_id is None
-            assert "0.19.1" not in flow.dependencies
+            assert "sklearn==0.19.1" not in flow.dependencies
+            assert "sklearn>=0.19.1" not in flow.dependencies
 
     def test_get_flow_id(self):
-        clf = sklearn.tree.DecisionTreeClassifier()
-        flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
-
-        self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
-        flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
-        self.assertIn(flow.flow_id, flow_ids)
-        self.assertGreater(len(flow_ids), 2)
-
-        # Check that the output of get_flow_id is identical if only the name is given, no matter
-        # whether exact_version is set to True or False.
-        flow_ids_exact_version_True = openml.flows.get_flow_id(name=flow.name, exact_version=True)
-        flow_ids_exact_version_False = openml.flows.get_flow_id(
-            name=flow.name, exact_version=False,
-        )
-        self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
-        self.assertIn(flow.flow_id, flow_ids_exact_version_True)
-        self.assertGreater(len(flow_ids_exact_version_True), 2)
+        if self.long_version:
+            list_all = openml.utils._list_all
+        else:
+            list_all = functools.lru_cache()(openml.utils._list_all)
+        with patch("openml.utils._list_all", list_all):
+            clf = sklearn.tree.DecisionTreeClassifier()
+            flow = openml.extensions.get_extension_by_model(clf).model_to_flow(clf).publish()
+            TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id)
+            )
+
+            self.assertEqual(openml.flows.get_flow_id(model=clf, exact_version=True), flow.flow_id)
+            flow_ids = openml.flows.get_flow_id(model=clf, exact_version=False)
+            self.assertIn(flow.flow_id, flow_ids)
+            self.assertGreater(len(flow_ids), 0)
+
+            # Check that the output of get_flow_id is identical if only the name is given, no matter
+            # whether exact_version is set to True or False.
+            flow_ids_exact_version_True = openml.flows.get_flow_id(
+                name=flow.name, exact_version=True
+            )
+            flow_ids_exact_version_False = openml.flows.get_flow_id(
+                name=flow.name, exact_version=False,
+            )
+            self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
+            self.assertIn(flow.flow_id, flow_ids_exact_version_True)
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index 8b470a45b..459a0cdf5 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -1,3 +1,5 @@
+import unittest.mock
+
 import openml
 import openml.testing
 
@@ -8,3 +10,23 @@ def test_too_long_uri(self):
             openml.exceptions.OpenMLServerError, "URI too long!",
         ):
             openml.datasets.list_datasets(data_id=list(range(10000)))
+
+    @unittest.mock.patch("time.sleep")
+    @unittest.mock.patch("requests.Session")
+    def test_retry_on_database_error(self, Session_class_mock, _):
+        response_mock = unittest.mock.Mock()
+        response_mock.text = (
+            "<oml:error>\n"
+            "<oml:code>107</oml:code>"
+            "<oml:message>Database connection error. "
+            "Usually due to high server load. "
+            "Please wait for N seconds and try again.</oml:message>\n"
+            "</oml:error>"
+        )
+        Session_class_mock.return_value.__enter__.return_value.get.return_value = response_mock
+        with self.assertRaisesRegex(
+            openml.exceptions.OpenMLServerException, "/abc returned code 107"
+        ):
+            openml._api_calls._send_request("get", "/abc", {})
+
+        self.assertEqual(Session_class_mock.return_value.__enter__.return_value.get.call_count, 10)
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 88136dbd9..5b15f781e 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -1,15 +1,59 @@
 # License: BSD 3-Clause
 
+import tempfile
 import os
+import unittest.mock
 
 import openml.config
 import openml.testing
 
 
 class TestConfig(openml.testing.TestBase):
-    def test_config_loading(self):
-        self.assertTrue(os.path.exists(openml.config.config_file))
-        self.assertTrue(os.path.isdir(os.path.expanduser("~/.openml")))
+    @unittest.mock.patch("os.path.expanduser")
+    @unittest.mock.patch("openml.config.openml_logger.warning")
+    @unittest.mock.patch("openml.config._create_log_handlers")
+    @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
+    def test_non_writable_home(self, log_handler_mock, warnings_mock, expanduser_mock):
+        with tempfile.TemporaryDirectory(dir=self.workdir) as td:
+            expanduser_mock.side_effect = (
+                os.path.join(td, "openmldir"),
+                os.path.join(td, "cachedir"),
+            )
+            os.chmod(td, 0o444)
+            openml.config._setup()
+
+        self.assertEqual(warnings_mock.call_count, 2)
+        self.assertEqual(log_handler_mock.call_count, 1)
+        self.assertFalse(log_handler_mock.call_args_list[0][1]["create_file_handler"])
+
+    def test_get_config_as_dict(self):
+        """ Checks if the current configuration is returned accurately as a dict. """
+        config = openml.config.get_config_as_dict()
+        _config = dict()
+        _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
+        _config["server"] = "https://test.openml.org/api/v1/xml"
+        _config["cachedir"] = self.workdir
+        _config["avoid_duplicate_runs"] = False
+        _config["connection_n_retries"] = 10
+        _config["max_retries"] = 20
+        self.assertIsInstance(config, dict)
+        self.assertEqual(len(config), 6)
+        self.assertDictEqual(config, _config)
+
+    def test_setup_with_config(self):
+        """ Checks if the OpenML configuration can be updated using _setup(). """
+        _config = dict()
+        _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
+        _config["server"] = "https://www.openml.org/api/v1/xml"
+        _config["cachedir"] = self.workdir
+        _config["avoid_duplicate_runs"] = True
+        _config["connection_n_retries"] = 100
+        _config["max_retries"] = 1000
+        orig_config = openml.config.get_config_as_dict()
+        openml.config._setup(_config)
+        updated_config = openml.config.get_config_as_dict()
+        openml.config._setup(orig_config)  # important to not affect other unit tests
+        self.assertDictEqual(_config, updated_config)
 
 
 class TestConfigurationForExamples(openml.testing.TestBase):
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 864863f4a..dd0da5c00 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -5,11 +5,13 @@
 import os
 from time import time
 
+import xmltodict
 from sklearn.dummy import DummyClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
 
+from openml import OpenMLRun
 from openml.testing import TestBase, SimpleImputer
 import openml
 import openml.extensions.sklearn
@@ -102,7 +104,7 @@ def test_to_from_filesystem_vanilla(self):
                 ("classifier", DecisionTreeClassifier(max_depth=1)),
             ]
         )
-        task = openml.tasks.get_task(119)
+        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
         run = openml.runs.run_model_on_task(
             model=model,
             task=task,
@@ -142,7 +144,7 @@ def test_to_from_filesystem_search(self):
             },
         )
 
-        task = openml.tasks.get_task(119)
+        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
         run = openml.runs.run_model_on_task(
             model=model, task=task, add_local_measures=False, avoid_duplicate_runs=False,
         )
@@ -163,7 +165,7 @@ def test_to_from_filesystem_no_model(self):
         model = Pipeline(
             [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
         )
-        task = openml.tasks.get_task(119)
+        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
         run = openml.runs.run_model_on_task(model=model, task=task, add_local_measures=False)
 
         cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
@@ -184,7 +186,7 @@ def test_publish_with_local_loaded_flow(self):
         model = Pipeline(
             [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
         )
-        task = openml.tasks.get_task(119)
+        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
 
         # Make sure the flow does not exist on the server yet.
         flow = extension.model_to_flow(model)
@@ -215,3 +217,19 @@ def test_publish_with_local_loaded_flow(self):
         # make sure the flow is published as part of publishing the run.
         self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
         openml.runs.get_run(loaded_run.run_id)
+
+    def test_run_setup_string_included_in_xml(self):
+        SETUP_STRING = "setup-string"
+        run = OpenMLRun(
+            task_id=0,
+            flow_id=None,  # if not none, flow parameters are required.
+            dataset_id=0,
+            setup_string=SETUP_STRING,
+        )
+        xml = run._to_xml()
+        run_dict = xmltodict.parse(xml)["oml:run"]
+        assert "oml:setup_string" in run_dict
+        assert run_dict["oml:setup_string"] == SETUP_STRING
+
+        recreated_run = openml.runs.functions._create_run_from_xml(xml, from_server=False)
+        assert recreated_run.setup_string == SETUP_STRING
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 89f01c72e..4534f26a4 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1,5 +1,4 @@
 # License: BSD 3-Clause
-from typing import Tuple, List, Union
 
 import arff
 from distutils.version import LooseVersion
@@ -7,10 +6,12 @@
 import random
 import time
 import sys
+import ast
 import unittest.mock
 
 import numpy as np
-import pytest
+import joblib
+from joblib import parallel_backend
 
 import openml
 import openml.exceptions
@@ -21,10 +22,13 @@
 import pandas as pd
 
 import openml.extensions.sklearn
-from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
+from openml.testing import TestBase, SimpleImputer, CustomImputer
+from openml.extensions.sklearn import cat, cont
 from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskType
+from openml.testing import check_task_existence
+from openml.exceptions import OpenMLServerException
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
@@ -42,19 +46,45 @@
 
 class TestRun(TestBase):
     _multiprocess_can_split_ = True
-    # diabetis dataset, 768 observations, 0 missing vals, 33% holdout set
-    # (253 test obs), no nominal attributes, all numeric attributes
-    TEST_SERVER_TASK_SIMPLE: Tuple[Union[int, List], ...] = (119, 0, 253, [], [*range(8)])
-    TEST_SERVER_TASK_REGRESSION: Tuple[Union[int, List], ...] = (738, 0, 718, [], [*range(8)])
-    # credit-a dataset, 690 observations, 67 missing vals, 33% holdout set
-    # (227 test obs)
-    TEST_SERVER_TASK_MISSING_VALS = (
-        96,
-        67,
-        227,
-        [0, 3, 4, 5, 6, 8, 9, 11, 12],
-        [1, 2, 7, 10, 13, 14],
-    )
+    TEST_SERVER_TASK_MISSING_VALS = {
+        "task_id": 96,
+        "n_missing_vals": 67,
+        "n_test_obs": 227,
+        "nominal_indices": [0, 3, 4, 5, 6, 8, 9, 11, 12],
+        "numeric_indices": [1, 2, 7, 10, 13, 14],
+        "task_meta_data": {
+            "task_type": TaskType.SUPERVISED_CLASSIFICATION,
+            "dataset_id": 16,  # credit-a
+            "estimation_procedure_id": 1,
+            "target_name": "class",
+        },
+    }
+    TEST_SERVER_TASK_SIMPLE = {
+        "task_id": 119,
+        "n_missing_vals": 0,
+        "n_test_obs": 253,
+        "nominal_indices": [],
+        "numeric_indices": [*range(8)],
+        "task_meta_data": {
+            "task_type": TaskType.SUPERVISED_CLASSIFICATION,
+            "dataset_id": 20,  # diabetes
+            "estimation_procedure_id": 1,
+            "target_name": "class",
+        },
+    }
+    TEST_SERVER_TASK_REGRESSION = {
+        "task_id": 1605,
+        "n_missing_vals": 0,
+        "n_test_obs": 2178,
+        "nominal_indices": [],
+        "numeric_indices": [*range(8)],
+        "task_meta_data": {
+            "task_type": TaskType.SUPERVISED_REGRESSION,
+            "dataset_id": 123,  # quake
+            "estimation_procedure_id": 7,
+            "target_name": "richter",
+        },
+    }
 
     # Suppress warnings to facilitate testing
     hide_warnings = True
@@ -335,7 +365,7 @@ def _check_sample_evaluations(
                         for sample in range(num_sample_entrees):
                             evaluation = sample_evaluations[measure][rep][fold][sample]
                             self.assertIsInstance(evaluation, float)
-                            if not os.environ.get("CI_WINDOWS"):
+                            if not (os.environ.get("CI_WINDOWS") or os.name == "nt"):
                                 # Either Appveyor is much faster than Travis
                                 # and/or measurements are not as accurate.
                                 # Either way, windows seems to get an eval-time
@@ -344,7 +374,7 @@ def _check_sample_evaluations(
                             self.assertLess(evaluation, max_time_allowed)
 
     def test_run_regression_on_classif_task(self):
-        task_id = 115
+        task_id = 115  # diabetes; crossvalidation
 
         clf = LinearRegression()
         task = openml.tasks.get_task(task_id)
@@ -358,7 +388,7 @@ def test_run_regression_on_classif_task(self):
             )
 
     def test_check_erronous_sklearn_flow_fails(self):
-        task_id = 115
+        task_id = 115  # diabetes; crossvalidation
         task = openml.tasks.get_task(task_id)
 
         # Invalid parameter values
@@ -443,7 +473,7 @@ def determine_grid_size(param_grid):
             # suboptimal (slow), and not guaranteed to work if evaluation
             # engine is behind.
             # TODO: mock this? We have the arff already on the server
-            self._wait_for_processed_run(run.run_id, 400)
+            self._wait_for_processed_run(run.run_id, 600)
             try:
                 model_prime = openml.runs.initialize_model_from_trace(
                     run_id=run.run_id, repeat=0, fold=0,
@@ -499,7 +529,7 @@ def _run_and_upload_classification(
     def _run_and_upload_regression(
         self, clf, task_id, n_missing_vals, n_test_obs, flow_expected_rsv, sentinel=None
     ):
-        num_folds = 1  # because of holdout
+        num_folds = 10  # because of cross-validation
         num_iterations = 5  # for base search algorithms
         metric = sklearn.metrics.mean_absolute_error  # metric class
         metric_name = "mean_absolute_error"  # openml metric name
@@ -520,17 +550,39 @@ def _run_and_upload_regression(
         )
 
     def test_run_and_upload_logistic_regression(self):
-        lr = LogisticRegression(solver="lbfgs")
-        task_id = self.TEST_SERVER_TASK_SIMPLE[0]
-        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
-        n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
+        lr = LogisticRegression(solver="lbfgs", max_iter=1000)
+        task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
-        task_id = self.TEST_SERVER_TASK_REGRESSION[0]
-        n_missing_vals = self.TEST_SERVER_TASK_REGRESSION[1]
-        n_test_obs = self.TEST_SERVER_TASK_REGRESSION[2]
+        task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
+
+        task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"]
+        _task_id = check_task_existence(**task_meta_data)
+        if _task_id is not None:
+            task_id = _task_id
+        else:
+            new_task = openml.tasks.create_task(**task_meta_data)
+            # publishes the new task
+            try:
+                new_task = new_task.publish()
+                task_id = new_task.task_id
+            except OpenMLServerException as e:
+                if e.code == 614:  # Task already exists
+                    # the exception message contains the task_id that was matched in the format
+                    # 'Task already exists. - matched id(s): [xxxx]'
+                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                else:
+                    raise Exception(repr(e))
+            # mark to remove the uploaded task
+            TestBase._mark_entity_for_removal("task", task_id)
+            TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
+
+        n_missing_vals = self.TEST_SERVER_TASK_REGRESSION["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"]
         self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
     def test_run_and_upload_pipeline_dummy_pipeline(self):
@@ -541,9 +593,9 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
                 ("dummy", DummyClassifier(strategy="prior")),
             ]
         )
-        task_id = self.TEST_SERVER_TASK_SIMPLE[0]
-        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
-        n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
+        task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
 
     @unittest.skipIf(
@@ -584,20 +636,26 @@ def get_ct_cf(nominal_indices, numeric_indices):
 
         sentinel = self._get_sentinel()
         self._run_and_upload_classification(
-            get_ct_cf(self.TEST_SERVER_TASK_SIMPLE[3], self.TEST_SERVER_TASK_SIMPLE[4]),
-            self.TEST_SERVER_TASK_SIMPLE[0],
-            self.TEST_SERVER_TASK_SIMPLE[1],
-            self.TEST_SERVER_TASK_SIMPLE[2],
+            get_ct_cf(
+                self.TEST_SERVER_TASK_SIMPLE["nominal_indices"],
+                self.TEST_SERVER_TASK_SIMPLE["numeric_indices"],
+            ),
+            self.TEST_SERVER_TASK_SIMPLE["task_id"],
+            self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"],
+            self.TEST_SERVER_TASK_SIMPLE["n_test_obs"],
             "62501",
             sentinel=sentinel,
         )
         # Due to #602, it is important to test this model on two tasks
         # with different column specifications
         self._run_and_upload_classification(
-            get_ct_cf(self.TEST_SERVER_TASK_MISSING_VALS[3], self.TEST_SERVER_TASK_MISSING_VALS[4]),
-            self.TEST_SERVER_TASK_MISSING_VALS[0],
-            self.TEST_SERVER_TASK_MISSING_VALS[1],
-            self.TEST_SERVER_TASK_MISSING_VALS[2],
+            get_ct_cf(
+                self.TEST_SERVER_TASK_MISSING_VALS["nominal_indices"],
+                self.TEST_SERVER_TASK_MISSING_VALS["numeric_indices"],
+            ),
+            self.TEST_SERVER_TASK_MISSING_VALS["task_id"],
+            self.TEST_SERVER_TASK_MISSING_VALS["n_missing_vals"],
+            self.TEST_SERVER_TASK_MISSING_VALS["n_test_obs"],
             "62501",
             sentinel=sentinel,
         )
@@ -606,7 +664,8 @@ def get_ct_cf(nominal_indices, numeric_indices):
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
     )
-    def test_run_and_upload_knn_pipeline(self):
+    @unittest.mock.patch("warnings.warn")
+    def test_run_and_upload_knn_pipeline(self, warnings_mock):
 
         cat_imp = make_pipeline(
             SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
@@ -632,19 +691,34 @@ def test_run_and_upload_knn_pipeline(self):
             ]
         )
 
-        task_id = self.TEST_SERVER_TASK_MISSING_VALS[0]
-        n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS[1]
-        n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS[2]
+        task_id = self.TEST_SERVER_TASK_MISSING_VALS["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_MISSING_VALS["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_MISSING_VALS["n_test_obs"]
         self._run_and_upload_classification(pipeline2, task_id, n_missing_vals, n_test_obs, "62501")
+        # The warning raised is:
+        # "The total space of parameters 8 is smaller than n_iter=10.
+        # Running 8 iterations. For exhaustive searches, use GridSearchCV."
+        # It is raised three times because we once run the model to upload something and then run
+        # it again twice to compare that the predictions are reproducible.
+        warning_msg = (
+            "The total space of parameters 8 is smaller than n_iter=10. "
+            "Running 8 iterations. For exhaustive searches, use GridSearchCV."
+        )
+        call_count = 0
+        for _warnings in warnings_mock.call_args_list:
+            if _warnings[0][0] == warning_msg:
+                call_count += 1
+        self.assertEqual(call_count, 3)
 
     def test_run_and_upload_gridsearch(self):
         gridsearch = GridSearchCV(
             BaggingClassifier(base_estimator=SVC()),
             {"base_estimator__C": [0.01, 0.1, 10], "base_estimator__gamma": [0.01, 0.1, 10]},
+            cv=3,
         )
-        task_id = self.TEST_SERVER_TASK_SIMPLE[0]
-        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
-        n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
+        task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         run = self._run_and_upload_classification(
             clf=gridsearch,
             task_id=task_id,
@@ -671,9 +745,9 @@ def test_run_and_upload_randomsearch(self):
         # The random states for the RandomizedSearchCV is set after the
         # random state of the RandomForestClassifier is set, therefore,
         # it has a different value than the other examples before
-        task_id = self.TEST_SERVER_TASK_SIMPLE[0]
-        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
-        n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
+        task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         run = self._run_and_upload_classification(
             clf=randomsearch,
             task_id=task_id,
@@ -682,6 +756,8 @@ def test_run_and_upload_randomsearch(self):
             flow_expected_rsv="12172",
         )
         self.assertEqual(len(run.trace.trace_iterations), 5)
+        trace = openml.runs.get_run_trace(run.run_id)
+        self.assertEqual(len(trace.trace_iterations), 5)
 
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
@@ -696,9 +772,9 @@ def test_run_and_upload_maskedarrays(self):
         # The random states for the GridSearchCV is set after the
         # random state of the RandomForestClassifier is set, therefore,
         # it has a different value than the other examples before
-        task_id = self.TEST_SERVER_TASK_SIMPLE[0]
-        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE[1]
-        n_test_obs = self.TEST_SERVER_TASK_SIMPLE[2]
+        task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
+        n_missing_vals = self.TEST_SERVER_TASK_SIMPLE["n_missing_vals"]
+        n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(
             gridsearch, task_id, n_missing_vals, n_test_obs, "12172"
         )
@@ -782,7 +858,7 @@ def test_initialize_cv_from_run(self):
             ]
         )
 
-        task = openml.tasks.get_task(11)
+        task = openml.tasks.get_task(11)  # kr-vs-kp; holdout
         run = openml.runs.run_model_on_task(
             model=randomsearch, task=task, avoid_duplicate_runs=False, seed=1,
         )
@@ -828,31 +904,12 @@ def _test_local_evaluations(self, run):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
-    @unittest.skipIf(
-        LooseVersion(sklearn.__version__) < "0.20",
-        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
-    )
     def test_local_run_swapped_parameter_order_model(self):
+        clf = DecisionTreeClassifier()
+        australian_task = 595  # Australian; crossvalidation
+        task = openml.tasks.get_task(australian_task)
 
-        # construct sci-kit learn classifier
-        clf = Pipeline(
-            steps=[
-                (
-                    "imputer",
-                    make_pipeline(
-                        SimpleImputer(strategy="most_frequent"),
-                        OneHotEncoder(handle_unknown="ignore"),
-                    ),
-                ),
-                # random forest doesn't take categoricals
-                ("estimator", RandomForestClassifier()),
-            ]
-        )
-
-        # download task
-        task = openml.tasks.get_task(7)
-
-        # invoke OpenML run
+        # task and clf are purposely in the old order
         run = openml.runs.run_model_on_task(
             task, clf, avoid_duplicate_runs=False, upload_flow=False,
         )
@@ -876,7 +933,7 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         flow = self.extension.model_to_flow(clf)
         # download task
-        task = openml.tasks.get_task(7)
+        task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
 
         # invoke OpenML run
         run = openml.runs.run_flow_on_task(
@@ -901,7 +958,7 @@ def test_local_run_metric_score(self):
         )
 
         # download task
-        task = openml.tasks.get_task(7)
+        task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
@@ -931,7 +988,33 @@ def test_initialize_model_from_run(self):
                 ("Estimator", GaussianNB()),
             ]
         )
-        task = openml.tasks.get_task(1198)
+        task_meta_data = {
+            "task_type": TaskType.SUPERVISED_CLASSIFICATION,
+            "dataset_id": 128,  # iris
+            "estimation_procedure_id": 1,
+            "target_name": "class",
+        }
+        _task_id = check_task_existence(**task_meta_data)
+        if _task_id is not None:
+            task_id = _task_id
+        else:
+            new_task = openml.tasks.create_task(**task_meta_data)
+            # publishes the new task
+            try:
+                new_task = new_task.publish()
+                task_id = new_task.task_id
+            except OpenMLServerException as e:
+                if e.code == 614:  # Task already exists
+                    # the exception message contains the task_id that was matched in the format
+                    # 'Task already exists. - matched id(s): [xxxx]'
+                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                else:
+                    raise Exception(repr(e))
+            # mark to remove the uploaded task
+            TestBase._mark_entity_for_removal("task", task_id)
+            TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
+
+        task = openml.tasks.get_task(task_id)
         run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=False,)
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run_.run_id)
@@ -950,55 +1033,6 @@ def test_initialize_model_from_run(self):
         self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
         self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
-    @pytest.mark.flaky()
-    def test_get_run_trace(self):
-        # get_run_trace is already tested implicitly in test_run_and_publish
-        # this test is a bit additional.
-        num_iterations = 10
-        num_folds = 1
-        task_id = 119
-
-        task = openml.tasks.get_task(task_id)
-
-        # IMPORTANT! Do not sentinel this flow. is faster if we don't wait
-        # on openml server
-        clf = RandomizedSearchCV(
-            RandomForestClassifier(random_state=42, n_estimators=5),
-            {
-                "max_depth": [3, None],
-                "max_features": [1, 2, 3, 4],
-                "bootstrap": [True, False],
-                "criterion": ["gini", "entropy"],
-            },
-            num_iterations,
-            random_state=42,
-            cv=3,
-        )
-
-        # [SPEED] make unit test faster by exploiting run information
-        # from the past
-        try:
-            # in case the run did not exists yet
-            run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=True,)
-
-            self.assertEqual(
-                len(run.trace.trace_iterations), num_iterations * num_folds,
-            )
-            run = run.publish()
-            TestBase._mark_entity_for_removal("run", run.run_id)
-            TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
-            self._wait_for_processed_run(run.run_id, 400)
-            run_id = run.run_id
-        except openml.exceptions.OpenMLRunsExistError as e:
-            # The only error we expect, should fail otherwise.
-            run_ids = [int(run_id) for run_id in e.run_ids]
-            self.assertGreater(len(run_ids), 0)
-            run_id = random.choice(list(run_ids))
-
-        # now the actual unit test ...
-        run_trace = openml.runs.get_run_trace(run_id)
-        self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)
-
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1025,7 +1059,7 @@ def test__run_exists(self):
             ),
         ]
 
-        task = openml.tasks.get_task(115)
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
 
         for clf in clfs:
             try:
@@ -1055,8 +1089,8 @@ def test__run_exists(self):
 
     def test_run_with_illegal_flow_id(self):
         # check the case where the user adds an illegal flow id to a
-        # non-existing flow
-        task = openml.tasks.get_task(115)
+        # non-existing flo
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         clf = DecisionTreeClassifier()
         flow = self.extension.model_to_flow(clf)
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
@@ -1072,7 +1106,7 @@ def test_run_with_illegal_flow_id(self):
     def test_run_with_illegal_flow_id_after_load(self):
         # Same as `test_run_with_illegal_flow_id`, but test this error is also
         # caught if the run is stored to and loaded from disk first.
-        task = openml.tasks.get_task(115)
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         clf = DecisionTreeClassifier()
         flow = self.extension.model_to_flow(clf)
         flow, _ = self._add_sentinel_to_flow_name(flow, None)
@@ -1096,7 +1130,7 @@ def test_run_with_illegal_flow_id_after_load(self):
     def test_run_with_illegal_flow_id_1(self):
         # Check the case where the user adds an illegal flow id to an existing
         # flow. Comes to a different value error than the previous test
-        task = openml.tasks.get_task(115)
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         clf = DecisionTreeClassifier()
         flow_orig = self.extension.model_to_flow(clf)
         try:
@@ -1118,7 +1152,7 @@ def test_run_with_illegal_flow_id_1(self):
     def test_run_with_illegal_flow_id_1_after_load(self):
         # Same as `test_run_with_illegal_flow_id_1`, but test this error is
         # also caught if the run is stored to and loaded from disk first.
-        task = openml.tasks.get_task(115)
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         clf = DecisionTreeClassifier()
         flow_orig = self.extension.model_to_flow(clf)
         try:
@@ -1149,18 +1183,15 @@ def test_run_with_illegal_flow_id_1_after_load(self):
         reason="OneHotEncoder cannot handle mixed type DataFrame as input",
     )
     def test__run_task_get_arffcontent(self):
-        task = openml.tasks.get_task(7)
+        task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
         num_instances = 3196
         num_folds = 10
         num_repeats = 1
 
-        flow = unittest.mock.Mock()
-        flow.name = "dummy"
         clf = make_pipeline(
             OneHotEncoder(handle_unknown="ignore"), SGDClassifier(loss="log", random_state=1)
         )
         res = openml.runs.functions._run_task_get_arffcontent(
-            flow=flow,
             extension=self.extension,
             model=clf,
             task=task,
@@ -1371,9 +1402,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
         # Check that _run_task_get_arffcontent works when one of the class
         # labels only declared in the arff file, but is not present in the
         # actual data
-        flow = unittest.mock.Mock()
-        flow.name = "dummy"
-        task = openml.tasks.get_task(2)
+        task = openml.tasks.get_task(2)  # anneal; crossvalidation
 
         from sklearn.compose import ColumnTransformer
 
@@ -1387,7 +1416,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
         )  # build a sklearn classifier
 
         data_content, _, _, _ = _run_task_get_arffcontent(
-            flow=flow,
             model=model,
             task=task,
             extension=self.extension,
@@ -1409,9 +1437,7 @@ def test_run_on_dataset_with_missing_labels_array(self):
         # Check that _run_task_get_arffcontent works when one of the class
         # labels only declared in the arff file, but is not present in the
         # actual data
-        flow = unittest.mock.Mock()
-        flow.name = "dummy"
-        task = openml.tasks.get_task(2)
+        task = openml.tasks.get_task(2)  # anneal; crossvalidation
         # task_id=2 on test server has 38 columns with 6 numeric columns
         cont_idx = [3, 4, 8, 32, 33, 34]
         cat_idx = list(set(np.arange(38)) - set(cont_idx))
@@ -1432,7 +1458,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
         )  # build a sklearn classifier
 
         data_content, _, _, _ = _run_task_get_arffcontent(
-            flow=flow,
             model=model,
             task=task,
             extension=self.extension,
@@ -1463,7 +1488,7 @@ def test_run_flow_on_task_downloaded_flow(self):
         TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))
 
         downloaded_flow = openml.flows.get_flow(flow.flow_id)
-        task = openml.tasks.get_task(119)  # diabetes
+        task = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE["task_id"])
         run = openml.runs.run_flow_on_task(
             flow=downloaded_flow, task=task, avoid_duplicate_runs=False, upload_flow=False,
         )
@@ -1483,20 +1508,26 @@ def test_format_prediction_non_supervised(self):
             format_prediction(clustering, *ignored_input)
 
     def test_format_prediction_classification_no_probabilities(self):
-        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        classification = openml.tasks.get_task(
+            self.TEST_SERVER_TASK_SIMPLE["task_id"], download_data=False
+        )
         ignored_input = [0] * 5
         with self.assertRaisesRegex(ValueError, "`proba` is required for classification task"):
             format_prediction(classification, *ignored_input, proba=None)
 
     def test_format_prediction_classification_incomplete_probabilities(self):
-        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        classification = openml.tasks.get_task(
+            self.TEST_SERVER_TASK_SIMPLE["task_id"], download_data=False
+        )
         ignored_input = [0] * 5
         incomplete_probabilities = {c: 0.2 for c in classification.class_labels[1:]}
         with self.assertRaisesRegex(ValueError, "Each class should have a predicted probability"):
             format_prediction(classification, *ignored_input, proba=incomplete_probabilities)
 
     def test_format_prediction_task_without_classlabels_set(self):
-        classification = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE[0], download_data=False)
+        classification = openml.tasks.get_task(
+            self.TEST_SERVER_TASK_SIMPLE["task_id"], download_data=False
+        )
         classification.class_labels = None
         ignored_input = [0] * 5
         with self.assertRaisesRegex(
@@ -1505,14 +1536,146 @@ def test_format_prediction_task_without_classlabels_set(self):
             format_prediction(classification, *ignored_input, proba={})
 
     def test_format_prediction_task_learning_curve_sample_not_set(self):
-        learning_curve = openml.tasks.get_task(801, download_data=False)
+        learning_curve = openml.tasks.get_task(801, download_data=False)  # diabetes;crossvalidation
         probabilities = {c: 0.2 for c in learning_curve.class_labels}
         ignored_input = [0] * 5
         with self.assertRaisesRegex(ValueError, "`sample` can not be none for LearningCurveTask"):
             format_prediction(learning_curve, *ignored_input, sample=None, proba=probabilities)
 
     def test_format_prediction_task_regression(self):
-        regression = openml.tasks.get_task(self.TEST_SERVER_TASK_REGRESSION[0], download_data=False)
+        task_meta_data = self.TEST_SERVER_TASK_REGRESSION["task_meta_data"]
+        _task_id = check_task_existence(**task_meta_data)
+        if _task_id is not None:
+            task_id = _task_id
+        else:
+            new_task = openml.tasks.create_task(**task_meta_data)
+            # publishes the new task
+            try:
+                new_task = new_task.publish()
+                task_id = new_task.task_id
+            except OpenMLServerException as e:
+                if e.code == 614:  # Task already exists
+                    # the exception message contains the task_id that was matched in the format
+                    # 'Task already exists. - matched id(s): [xxxx]'
+                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                else:
+                    raise Exception(repr(e))
+            # mark to remove the uploaded task
+            TestBase._mark_entity_for_removal("task", task_id)
+            TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
+
+        regression = openml.tasks.get_task(task_id, download_data=False)
         ignored_input = [0] * 5
         res = format_prediction(regression, *ignored_input)
         self.assertListEqual(res, [0] * 5)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="couldn't perform local tests successfully w/o bloating RAM",
+    )
+    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    def test__run_task_get_arffcontent_2(self, parallel_mock):
+        """ Tests if a run executed in parallel is collated correctly. """
+        task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
+        x, y = task.get_X_and_y(dataset_format="dataframe")
+        num_instances = x.shape[0]
+        line_length = 6 + len(task.class_labels)
+        clf = SGDClassifier(loss="log", random_state=1)
+        n_jobs = 2
+        backend = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing"
+        with parallel_backend(backend, n_jobs=n_jobs):
+            res = openml.runs.functions._run_task_get_arffcontent(
+                extension=self.extension,
+                model=clf,
+                task=task,
+                add_local_measures=True,
+                dataset_format="array",  # "dataframe" would require handling of categoricals
+                n_jobs=n_jobs,
+            )
+        # This unit test will fail if joblib is unable to distribute successfully since the
+        # function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
+        # is not and the mock call_count should remain 0 while the subsequent check of actual
+        # results should also hold, only on successful distribution of tasks to workers.
+        # The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold()
+        # block and mocking this function doesn't affect rest of the pipeline, but is adequately
+        # indicative if _run_model_on_fold() is being called or not.
+        self.assertEqual(parallel_mock.call_count, 0)
+        self.assertIsInstance(res[0], list)
+        self.assertEqual(len(res[0]), num_instances)
+        self.assertEqual(len(res[0][0]), line_length)
+        self.assertEqual(len(res[2]), 7)
+        self.assertEqual(len(res[3]), 7)
+        expected_scores = [
+            0.965625,
+            0.94375,
+            0.946875,
+            0.953125,
+            0.96875,
+            0.965625,
+            0.9435736677115988,
+            0.9467084639498433,
+            0.9749216300940439,
+            0.9655172413793104,
+        ]
+        scores = [v for k, v in res[2]["predictive_accuracy"][0].items()]
+        np.testing.assert_array_almost_equal(
+            scores, expected_scores, decimal=2 if os.name == "nt" else 7
+        )
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.21",
+        reason="couldn't perform local tests successfully w/o bloating RAM",
+    )
+    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    def test_joblib_backends(self, parallel_mock):
+        """ Tests evaluation of a run using various joblib backends and n_jobs. """
+        task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
+        x, y = task.get_X_and_y(dataset_format="dataframe")
+        num_instances = x.shape[0]
+        line_length = 6 + len(task.class_labels)
+
+        backend_choice = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing"
+        for n_jobs, backend, len_time_stats, call_count in [
+            (1, backend_choice, 7, 10),
+            (2, backend_choice, 4, 10),
+            (-1, backend_choice, 1, 10),
+            (1, "threading", 7, 20),
+            (-1, "threading", 1, 30),
+            (1, "sequential", 7, 40),
+        ]:
+            clf = sklearn.model_selection.RandomizedSearchCV(
+                estimator=sklearn.ensemble.RandomForestClassifier(n_estimators=5),
+                param_distributions={
+                    "max_depth": [3, None],
+                    "max_features": [1, 2, 3, 4],
+                    "min_samples_split": [2, 3, 4, 5, 6, 7, 8, 9, 10],
+                    "min_samples_leaf": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+                    "bootstrap": [True, False],
+                    "criterion": ["gini", "entropy"],
+                },
+                random_state=1,
+                cv=sklearn.model_selection.StratifiedKFold(
+                    n_splits=2, shuffle=True, random_state=1
+                ),
+                n_iter=5,
+                n_jobs=n_jobs,
+            )
+            with parallel_backend(backend, n_jobs=n_jobs):
+                res = openml.runs.functions._run_task_get_arffcontent(
+                    extension=self.extension,
+                    model=clf,
+                    task=task,
+                    add_local_measures=True,
+                    dataset_format="array",  # "dataframe" would require handling of categoricals
+                    n_jobs=n_jobs,
+                )
+            self.assertEqual(type(res[0]), list)
+            self.assertEqual(len(res[0]), num_instances)
+            self.assertEqual(len(res[0][0]), line_length)
+            # usercpu_time_millis_* not recorded when n_jobs > 1
+            # *_time_millis_* not recorded when n_jobs = -1
+            self.assertEqual(len(res[2]), len_time_stats)
+            self.assertEqual(len(res[3]), len_time_stats)
+            self.assertEqual(len(res[2]["predictive_accuracy"][0]), 10)
+            self.assertEqual(len(res[3]["predictive_accuracy"][0]), 10)
+            self.assertEqual(parallel_mock.call_count, call_count)
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index e89318728..538b08821 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -67,7 +67,7 @@ def _existing_setup_exists(self, classif):
         self.assertFalse(setup_id)
 
         # now run the flow on an easy task:
-        task = openml.tasks.get_task(115)  # diabetes
+        task = openml.tasks.get_task(115)  # diabetes; crossvalidation
         run = openml.runs.run_flow_on_task(flow, task)
         # spoof flow id, otherwise the sentinel is ignored
         run.flow_id = flow.flow_id
diff --git a/tests/test_study/__init__.py b/tests/test_study/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index fdb2747ec..682359a61 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -1,6 +1,7 @@
 # License: BSD 3-Clause
 
-from openml.testing import TestBase, SimpleImputer, CustomImputer, cat, cont
+from openml.testing import TestBase
+from openml.extensions.sklearn import cat, cont
 
 import sklearn
 import unittest
@@ -12,8 +13,8 @@ class TestStudyFunctions(TestBase):
     """Test the example code of Bischl et al. (2018)"""
 
     @unittest.skipIf(
-        LooseVersion(sklearn.__version__) < "0.20",
-        reason="columntransformer introduction in 0.20.0",
+        LooseVersion(sklearn.__version__) < "0.24",
+        reason="columntransformer introduction in 0.24.0",
     )
     def test_Figure1a(self):
         """Test listing in Figure 1a on a single task and the old OpenML100 study.
@@ -38,15 +39,14 @@ def test_Figure1a(self):
         import openml
         import sklearn.metrics
         import sklearn.tree
+        from sklearn.impute import SimpleImputer
         from sklearn.pipeline import Pipeline, make_pipeline
         from sklearn.compose import ColumnTransformer
         from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
         benchmark_suite = openml.study.get_study("OpenML100", "tasks")  # obtain the benchmark suite
-        cat_imp = make_pipeline(
-            SimpleImputer(strategy="most_frequent"), OneHotEncoder(handle_unknown="ignore")
-        )
-        cont_imp = make_pipeline(CustomImputer(), StandardScaler())
+        cat_imp = OneHotEncoder(handle_unknown="ignore")
+        cont_imp = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())
         ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
         clf = Pipeline(
             steps=[("preprocess", ct), ("estimator", sklearn.tree.DecisionTreeClassifier())]
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 993771c90..e028ba2bd 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -4,6 +4,7 @@
 import openml.study
 from openml.testing import TestBase
 import pandas as pd
+import pytest
 
 
 class TestStudyFunctions(TestBase):
@@ -113,6 +114,7 @@ def test_publish_benchmark_suite(self):
         self.assertEqual(study_downloaded.status, "deactivated")
         # can't delete study, now it's not longer in preparation
 
+    @pytest.mark.flaky()
     def test_publish_study(self):
         # get some random runs to attach
         run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10)
@@ -133,8 +135,8 @@ def test_publish_study(self):
             run_ids=list(run_list.keys()),
         )
         study.publish()
-        # not tracking upload for delete since _delete_entity called end of function
-        # asserting return status from openml.study.delete_study()
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
         self.assertGreater(study.id, 0)
         study_downloaded = openml.study.get_study(study.id)
         self.assertEqual(study_downloaded.alias, fixt_alias)
@@ -213,9 +215,8 @@ def test_study_attach_illegal(self):
     def test_study_list(self):
         study_list = openml.study.list_studies(status="in_preparation")
         # might fail if server is recently resetted
-        self.assertGreater(len(study_list), 2)
+        self.assertGreaterEqual(len(study_list), 2)
 
     def test_study_list_output_format(self):
         study_list = openml.study.list_studies(status="in_preparation", output_format="dataframe")
         self.assertIsInstance(study_list, pd.DataFrame)
-        self.assertGreater(len(study_list), 2)
diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py
index 4f03f8bff..c4f74c5ce 100644
--- a/tests/test_tasks/test_classification_task.py
+++ b/tests/test_tasks/test_classification_task.py
@@ -13,7 +13,7 @@ class OpenMLClassificationTaskTest(OpenMLSupervisedTaskTest):
     def setUp(self, n_levels: int = 1):
 
         super(OpenMLClassificationTaskTest, self).setUp()
-        self.task_id = 119
+        self.task_id = 119  # diabetes
         self.task_type = TaskType.SUPERVISED_CLASSIFICATION
         self.estimation_procedure = 1
 
diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py
index 9f0157187..b1422d308 100644
--- a/tests/test_tasks/test_learning_curve_task.py
+++ b/tests/test_tasks/test_learning_curve_task.py
@@ -13,7 +13,7 @@ class OpenMLLearningCurveTaskTest(OpenMLSupervisedTaskTest):
     def setUp(self, n_levels: int = 1):
 
         super(OpenMLLearningCurveTaskTest, self).setUp()
-        self.task_id = 801
+        self.task_id = 801  # diabetes
         self.task_type = TaskType.LEARNING_CURVE
         self.estimation_procedure = 13
 
diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py
index e751e63b5..c38d8fa91 100644
--- a/tests/test_tasks/test_regression_task.py
+++ b/tests/test_tasks/test_regression_task.py
@@ -1,8 +1,13 @@
 # License: BSD 3-Clause
 
+import ast
 import numpy as np
 
+import openml
 from openml.tasks import TaskType
+from openml.testing import TestBase
+from openml.testing import check_task_existence
+from openml.exceptions import OpenMLServerException
 from .test_supervised_task import OpenMLSupervisedTaskTest
 
 
@@ -11,9 +16,34 @@ class OpenMLRegressionTaskTest(OpenMLSupervisedTaskTest):
     __test__ = True
 
     def setUp(self, n_levels: int = 1):
-
         super(OpenMLRegressionTaskTest, self).setUp()
-        self.task_id = 625
+
+        task_meta_data = {
+            "task_type": TaskType.SUPERVISED_REGRESSION,
+            "dataset_id": 105,  # wisconsin
+            "estimation_procedure_id": 7,
+            "target_name": "time",
+        }
+        _task_id = check_task_existence(**task_meta_data)
+        if _task_id is not None:
+            task_id = _task_id
+        else:
+            new_task = openml.tasks.create_task(**task_meta_data)
+            # publishes the new task
+            try:
+                new_task = new_task.publish()
+                task_id = new_task.task_id
+                # mark to remove the uploaded task
+                TestBase._mark_entity_for_removal("task", task_id)
+                TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
+            except OpenMLServerException as e:
+                if e.code == 614:  # Task already exists
+                    # the exception message contains the task_id that was matched in the format
+                    # 'Task already exists. - matched id(s): [xxxx]'
+                    task_id = ast.literal_eval(e.message.split("matched id(s):")[-1].strip())[0]
+                else:
+                    raise Exception(repr(e))
+        self.task_id = task_id
         self.task_type = TaskType.SUPERVISED_REGRESSION
         self.estimation_procedure = 7
 
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 5f9b65495..418b21b65 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -66,7 +66,7 @@ def _check_task(self, task):
         self.assertIn(task["status"], ["in_preparation", "active", "deactivated"])
 
     def test_list_tasks_by_type(self):
-        num_curves_tasks = 200  # number is flexible, check server if fails
+        num_curves_tasks = 198  # number is flexible, check server if fails
         ttid = TaskType.LEARNING_CURVE
         tasks = openml.tasks.list_tasks(task_type=ttid)
         self.assertGreaterEqual(len(tasks), num_curves_tasks)
@@ -110,7 +110,7 @@ def test_list_tasks_paginate(self):
                 self._check_task(tasks[tid])
 
     def test_list_tasks_per_type_paginate(self):
-        size = 10
+        size = 40
         max = 100
         task_types = [
             TaskType.SUPERVISED_CLASSIFICATION,
@@ -139,7 +139,7 @@ def test__get_task_live(self):
         openml.tasks.get_task(34536)
 
     def test_get_task(self):
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
@@ -158,7 +158,7 @@ def test_get_task(self):
         )
 
     def test_get_task_lazy(self):
-        task = openml.tasks.get_task(2, download_data=False)
+        task = openml.tasks.get_task(2, download_data=False)  # anneal; crossvalidation
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
@@ -198,7 +198,7 @@ def assert_and_raise(*args, **kwargs):
 
         get_dataset.side_effect = assert_and_raise
         try:
-            openml.tasks.get_task(1)
+            openml.tasks.get_task(1)  # anneal; crossvalidation
         except WeirdException:
             pass
         # Now the file should no longer exist
@@ -219,7 +219,7 @@ def test_get_task_different_types(self):
         openml.tasks.functions.get_task(126033)
 
     def test_download_split(self):
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
         split = task.download_split()
         self.assertEqual(type(split), OpenMLSplit)
         self.assertTrue(
diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py
index 137e29fe4..9878feb96 100644
--- a/tests/test_tasks/test_task_methods.py
+++ b/tests/test_tasks/test_task_methods.py
@@ -15,7 +15,7 @@ def tearDown(self):
         super(OpenMLTaskMethodsTest, self).tearDown()
 
     def test_tagging(self):
-        task = openml.tasks.get_task(1)
+        task = openml.tasks.get_task(1)  # anneal; crossvalidation
         tag = "testing_tag_{}_{}".format(self.id(), time())
         task_list = openml.tasks.list_tasks(tag=tag)
         self.assertEqual(len(task_list), 0)
@@ -40,9 +40,9 @@ def test_get_train_and_test_split_indices(self):
         self.assertEqual(681, train_indices[-1])
         self.assertEqual(583, test_indices[0])
         self.assertEqual(24, test_indices[-1])
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             ValueError, "Fold 10 not known", task.get_train_test_split_indices, 10, 0
         )
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             ValueError, "Repeat 10 not known", task.get_train_test_split_indices, 0, 10
         )
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 9729100bb..4fa08e1ab 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -1,17 +1,15 @@
-from openml.testing import TestBase
+import os
+import tempfile
+import unittest.mock
+
 import numpy as np
-import openml
-import sys
 
-if sys.version_info[0] >= 3:
-    from unittest import mock
-else:
-    import mock
+import openml
+from openml.testing import TestBase
 
 
 class OpenMLTaskTest(TestBase):
     _multiprocess_can_split_ = True
-    _batch_size = 25
 
     def mocked_perform_api_call(call, request_method):
         # TODO: JvR: Why is this not a staticmethod?
@@ -21,7 +19,7 @@ def mocked_perform_api_call(call, request_method):
     def test_list_all(self):
         openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
 
-    @mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
+    @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
     def test_list_all_few_results_available(self, _perform_api_call):
         # we want to make sure that the number of api calls is only 1.
         # Although we have multiple versions of the iris dataset, there is only
@@ -33,7 +31,7 @@ def test_list_all_few_results_available(self, _perform_api_call):
 
     def test_list_all_for_datasets(self):
         required_size = 127  # default test server reset value
-        datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size)
+        datasets = openml.datasets.list_datasets(batch_size=100, size=required_size)
 
         self.assertEqual(len(datasets), required_size)
         for did in datasets:
@@ -53,13 +51,13 @@ def test_list_datasets_with_high_size_parameter(self):
 
     def test_list_all_for_tasks(self):
         required_size = 1068  # default test server reset value
-        tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size)
+        tasks = openml.tasks.list_tasks(batch_size=1000, size=required_size)
 
         self.assertEqual(len(tasks), required_size)
 
     def test_list_all_for_flows(self):
         required_size = 15  # default test server reset value
-        flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size)
+        flows = openml.flows.list_flows(batch_size=25, size=required_size)
 
         self.assertEqual(len(flows), required_size)
 
@@ -73,7 +71,7 @@ def test_list_all_for_setups(self):
 
     def test_list_all_for_runs(self):
         required_size = 21
-        runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size)
+        runs = openml.runs.list_runs(batch_size=25, size=required_size)
 
         # might not be on test server after reset, please rerun test at least once if fails
         self.assertEqual(len(runs), required_size)
@@ -87,3 +85,19 @@ def test_list_all_for_evaluations(self):
 
         # might not be on test server after reset, please rerun test at least once if fails
         self.assertEqual(len(evaluations), required_size)
+
+    @unittest.mock.patch("openml.config.get_cache_directory")
+    @unittest.skipIf(os.name == "nt", "https://github.com/openml/openml-python/issues/1033")
+    def test__create_cache_directory(self, config_mock):
+        with tempfile.TemporaryDirectory(dir=self.workdir) as td:
+            config_mock.return_value = td
+            openml.utils._create_cache_directory("abc")
+            self.assertTrue(os.path.exists(os.path.join(td, "abc")))
+            subdir = os.path.join(td, "def")
+            os.mkdir(subdir)
+            os.chmod(subdir, 0o444)
+            config_mock.return_value = subdir
+            with self.assertRaisesRegex(
+                openml.exceptions.OpenMLCacheException, r"Cannot create cache directory",
+            ):
+                openml.utils._create_cache_directory("ghi")