openml · PGijsbers · Feb 10, 2021 · Oct 30, 2020 · Dec 15, 2020 · Jan 28, 2021
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -155,7 +155,7 @@ def _read_url_files(url, data=None, file_elements=None):
 
 def __read_url(url, request_method, data=None, md5_checksum=None):
     data = {} if data is None else data
-    if config.apikey is not None:
+    if config.apikey:
         data["api_key"] = config.apikey
     return _send_request(
         request_method=request_method, url=url, data=data, md5_checksum=md5_checksum

diff --git a/openml/config.py b/openml/config.py
@@ -7,6 +7,8 @@
 import logging
 import logging.handlers
 import os
+from pathlib import Path
+import platform
 from typing import Tuple, cast
 
 from io import StringIO
@@ -19,7 +21,7 @@
 file_handler = None
 
 
-def _create_log_handlers():
+def _create_log_handlers(create_file_handler=True):
     """ Creates but does not attach the log handlers. """
     global console_handler, file_handler
     if console_handler is not None or file_handler is not None:
@@ -32,12 +34,13 @@ def _create_log_handlers():
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(output_formatter)
 
-    one_mb = 2 ** 20
-    log_path = os.path.join(cache_directory, "openml_python.log")
-    file_handler = logging.handlers.RotatingFileHandler(
-        log_path, maxBytes=one_mb, backupCount=1, delay=True
-    )
-    file_handler.setFormatter(output_formatter)
+    if create_file_handler:
+        one_mb = 2 ** 20
+        log_path = os.path.join(cache_directory, "openml_python.log")
+        file_handler = logging.handlers.RotatingFileHandler(
+            log_path, maxBytes=one_mb, backupCount=1, delay=True
+        )
+        file_handler.setFormatter(output_formatter)
 
 
 def _convert_log_levels(log_level: int) -> Tuple[int, int]:
@@ -83,15 +86,18 @@ def set_file_log_level(file_output_level: int):
 
 # Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
 _defaults = {
-    "apikey": None,
+    "apikey": "",
     "server": "https://www.openml.org/api/v1/xml",
-    "cachedir": os.path.expanduser(os.path.join("~", ".openml", "cache")),
+    "cachedir": (
+        os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
+        if platform.system() == "Linux"
+        else os.path.join("~", ".openml")
+    ),
     "avoid_duplicate_runs": "True",
-    "connection_n_retries": 10,
-    "max_retries": 20,
+    "connection_n_retries": "10",
+    "max_retries": "20",
 }
 
-config_file = os.path.expanduser(os.path.join("~", ".openml", "config"))
 
 # Default values are actually added here in the _setup() function which is
 # called at the end of this module
@@ -116,8 +122,8 @@ def get_server_base_url() -> str:
 avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False
 
 # Number of retries if the connection breaks
-connection_n_retries = _defaults["connection_n_retries"]
-max_retries = _defaults["max_retries"]
+connection_n_retries = int(_defaults["connection_n_retries"])
+max_retries = int(_defaults["max_retries"])
 
 
 class ConfigurationForExamples:
@@ -187,62 +193,78 @@ def _setup():
     global connection_n_retries
     global max_retries
 
-    # read config file, create cache directory
-    try:
-        os.mkdir(os.path.expanduser(os.path.join("~", ".openml")))
-    except FileExistsError:
-        # For other errors, we want to propagate the error as openml does not work without cache
-        pass
+    if platform.system() == "Linux":
+        config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml"))
+    else:
+        config_dir = Path("~") / ".openml"
+    # Still use os.path.expanduser to trigger the mock in the unit test
+    config_dir = Path(os.path.expanduser(config_dir))
+    config_file = config_dir / "config"
+
+    # read config file, create directory for config file
+    if not os.path.exists(config_dir):
+        try:
+            os.mkdir(config_dir)
+            cache_exists = True
+        except PermissionError:
+            cache_exists = False
+    else:
+        cache_exists = True
+
+    if cache_exists:
+        _create_log_handlers()
+    else:
+        _create_log_handlers(create_file_handler=False)
+        openml_logger.warning(
+            "No permission to create OpenML directory at %s! This can result in OpenML-Python "
+            "not working properly." % config_dir
+        )
 
-    config = _parse_config()
+    config = _parse_config(config_file)
     apikey = config.get("FAKE_SECTION", "apikey")
     server = config.get("FAKE_SECTION", "server")
 
-    short_cache_dir = config.get("FAKE_SECTION", "cachedir")
-    cache_directory = os.path.expanduser(short_cache_dir)
+    cache_dir = config.get("FAKE_SECTION", "cachedir")
+    cache_directory = os.path.expanduser(cache_dir)
 
     # create the cache subdirectory
-    try:
-        os.mkdir(cache_directory)
-    except FileExistsError:
-        # For other errors, we want to propagate the error as openml does not work without cache
-        pass
+    if not os.path.exists(cache_directory):
+        try:
+            os.mkdir(cache_directory)
+        except PermissionError:
+            openml_logger.warning(
+                "No permission to create openml cache directory at %s! This can result in "
+                "OpenML-Python not working properly." % cache_directory
+            )
 
     avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
-    connection_n_retries = config.get("FAKE_SECTION", "connection_n_retries")
-    max_retries = config.get("FAKE_SECTION", "max_retries")
+    connection_n_retries = int(config.get("FAKE_SECTION", "connection_n_retries"))
+    max_retries = int(config.get("FAKE_SECTION", "max_retries"))
     if connection_n_retries > max_retries:
         raise ValueError(
             "A higher number of retries than {} is not allowed to keep the "
             "server load reasonable".format(max_retries)
         )
 
 
-def _parse_config():
+def _parse_config(config_file: str):
     """ Parse the config file, set up defaults. """
     config = configparser.RawConfigParser(defaults=_defaults)
 
-    if not os.path.exists(config_file):
-        # Create an empty config file if there was none so far
-        fh = open(config_file, "w")
-        fh.close()
-        logger.info(
-            "Could not find a configuration file at %s. Going to "
-            "create an empty file there." % config_file
-        )
-
+    # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
+    # Cheat the ConfigParser module by adding a fake section header
+    config_file_ = StringIO()
+    config_file_.write("[FAKE_SECTION]\n")
     try:
-        # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
-        # Cheat the ConfigParser module by adding a fake section header
-        config_file_ = StringIO()
-        config_file_.write("[FAKE_SECTION]\n")
         with open(config_file) as fh:
             for line in fh:
                 config_file_.write(line)
-        config_file_.seek(0)
-        config.read_file(config_file_)
+    except FileNotFoundError:
+        logger.info("No config file found at %s, using default configuration.", config_file)
     except OSError as e:
-        logger.info("Error opening file %s: %s", config_file, e.message)
+        logger.info("Error opening file %s: %s", config_file, e.args[0])
+    config_file_.seek(0)
+    config.read_file(config_file_)
     return config
 
 
@@ -257,11 +279,7 @@ def get_cache_directory():
     """
     url_suffix = urlparse(server).netloc
     reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1])
-    if not cache_directory:
-        _cachedir = _defaults(cache_directory)
-    else:
-        _cachedir = cache_directory
-    _cachedir = os.path.join(_cachedir, reversed_url_suffix)
+    _cachedir = os.path.join(cache_directory, reversed_url_suffix)
     return _cachedir
 
 
@@ -297,4 +315,3 @@ def set_cache_directory(cachedir):
 ]
 
 _setup()
-_create_log_handlers()
diff --git a/openml/testing.py b/openml/testing.py
@@ -8,15 +8,8 @@
 import time
 from typing import Dict, Union, cast
 import unittest
-import warnings
 import pandas as pd
 
-# Currently, importing oslo raises a lot of warning that it will stop working
-# under python3.8; remove this once they disappear
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    from oslo_concurrency import lockutils
-
 import openml
 from openml.tasks import TaskType
 from openml.exceptions import OpenMLServerException
@@ -100,13 +93,6 @@ def setUp(self, n_levels: int = 1):
         openml.config.avoid_duplicate_runs = False
         openml.config.cache_directory = self.workdir
 
-        # If we're on travis, we save the api key in the config file to allow
-        # the notebook tests to read them.
-        if os.environ.get("TRAVIS") or os.environ.get("APPVEYOR"):
-            with lockutils.external_lock("config", lock_path=self.workdir):
-                with open(openml.config.config_file, "w") as fh:
-                    fh.write("apikey = %s" % openml.config.apikey)
-
         # Increase the number of retries to avoid spurious server failures
         self.connection_n_retries = openml.config.connection_n_retries
         openml.config.connection_n_retries = 10

diff --git a/openml/utils.py b/openml/utils.py
@@ -244,7 +244,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
                 limit=batch_size,
                 offset=current_offset,
                 output_format=output_format,
-                **active_filters
+                **active_filters,
             )
         except openml.exceptions.OpenMLServerNoResult:
             # we want to return an empty dict in this case
@@ -277,9 +277,11 @@ def _create_cache_directory(key):
     cache = config.get_cache_directory()
     cache_dir = os.path.join(cache, key)
     try:
-        os.makedirs(cache_dir)
-    except OSError:
-        pass
+        os.makedirs(cache_dir, exist_ok=True)
+    except Exception as e:
+        raise openml.exceptions.OpenMLCacheException(
+            f"Cannot create cache directory {cache_dir}."
+        ) from e
     return cache_dir
 
 

diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
@@ -1,15 +1,29 @@
 # License: BSD 3-Clause
 
+import tempfile
 import os
+import unittest.mock
 
 import openml.config
 import openml.testing
 
 
 class TestConfig(openml.testing.TestBase):
-    def test_config_loading(self):
-        self.assertTrue(os.path.exists(openml.config.config_file))
-        self.assertTrue(os.path.isdir(os.path.expanduser("~/.openml")))
+    @unittest.mock.patch("os.path.expanduser")
+    @unittest.mock.patch("openml.config.openml_logger.warning")
+    @unittest.mock.patch("openml.config._create_log_handlers")
+    def test_non_writable_home(self, log_handler_mock, warnings_mock, expanduser_mock):
+        with tempfile.TemporaryDirectory(dir=self.workdir) as td:
+            expanduser_mock.side_effect = (
+                os.path.join(td, "openmldir"),
+                os.path.join(td, "cachedir"),
+            )
+            os.chmod(td, 0o444)
+            openml.config._setup()
+
+        self.assertEqual(warnings_mock.call_count, 2)
+        self.assertEqual(log_handler_mock.call_count, 1)
+        self.assertFalse(log_handler_mock.call_args_list[0][1]["create_file_handler"])
 
 
 class TestConfigurationForExamples(openml.testing.TestBase):

diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
@@ -1,12 +1,11 @@
-from openml.testing import TestBase
+import os
+import tempfile
+import unittest.mock
+
 import numpy as np
-import openml
-import sys
 
-if sys.version_info[0] >= 3:
-    from unittest import mock
-else:
-    import mock
+import openml
+from openml.testing import TestBase
 
 
 class OpenMLTaskTest(TestBase):
@@ -20,7 +19,7 @@ def mocked_perform_api_call(call, request_method):
     def test_list_all(self):
         openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
 
-    @mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
+    @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
     def test_list_all_few_results_available(self, _perform_api_call):
         # we want to make sure that the number of api calls is only 1.
         # Although we have multiple versions of the iris dataset, there is only
@@ -86,3 +85,18 @@ def test_list_all_for_evaluations(self):
 
         # might not be on test server after reset, please rerun test at least once if fails
         self.assertEqual(len(evaluations), required_size)
+
+    @unittest.mock.patch("openml.config.get_cache_directory")
+    def test__create_cache_directory(self, config_mock):
+        with tempfile.TemporaryDirectory(dir=self.workdir) as td:
+            config_mock.return_value = td
+            openml.utils._create_cache_directory("abc")
+            self.assertTrue(os.path.exists(os.path.join(td, "abc")))
+            subdir = os.path.join(td, "def")
+            os.mkdir(subdir)
+            os.chmod(subdir, 0o444)
+            config_mock.return_value = subdir
+            with self.assertRaisesRegex(
+                openml.exceptions.OpenMLCacheException, r"Cannot create cache directory",
+            ):
+                openml.utils._create_cache_directory("ghi")