Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openml/_api_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _read_url_files(url, data=None, file_elements=None):

def __read_url(url, request_method, data=None, md5_checksum=None):
data = {} if data is None else data
if config.apikey is not None:
if config.apikey:
data["api_key"] = config.apikey
return _send_request(
request_method=request_method, url=url, data=data, md5_checksum=md5_checksum
Expand Down
123 changes: 70 additions & 53 deletions openml/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import logging
import logging.handlers
import os
from pathlib import Path
import platform
from typing import Tuple, cast

from io import StringIO
Expand All @@ -19,7 +21,7 @@
file_handler = None


def _create_log_handlers():
def _create_log_handlers(create_file_handler=True):
""" Creates but does not attach the log handlers. """
global console_handler, file_handler
if console_handler is not None or file_handler is not None:
Expand All @@ -32,12 +34,13 @@ def _create_log_handlers():
console_handler = logging.StreamHandler()
console_handler.setFormatter(output_formatter)

one_mb = 2 ** 20
log_path = os.path.join(cache_directory, "openml_python.log")
file_handler = logging.handlers.RotatingFileHandler(
log_path, maxBytes=one_mb, backupCount=1, delay=True
)
file_handler.setFormatter(output_formatter)
if create_file_handler:
one_mb = 2 ** 20
log_path = os.path.join(cache_directory, "openml_python.log")
file_handler = logging.handlers.RotatingFileHandler(
log_path, maxBytes=one_mb, backupCount=1, delay=True
)
file_handler.setFormatter(output_formatter)


def _convert_log_levels(log_level: int) -> Tuple[int, int]:
Expand Down Expand Up @@ -83,15 +86,18 @@ def set_file_log_level(file_output_level: int):

# Default values (see also https://github.com/openml/OpenML/wiki/Client-API-Standards)
_defaults = {
"apikey": None,
"apikey": "",
"server": "https://www.openml.org/api/v1/xml",
"cachedir": os.path.expanduser(os.path.join("~", ".openml", "cache")),
"cachedir": (
os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
if platform.system() == "Linux"
else os.path.join("~", ".openml")
),
"avoid_duplicate_runs": "True",
"connection_n_retries": 10,
"max_retries": 20,
"connection_n_retries": "10",
"max_retries": "20",
}

config_file = os.path.expanduser(os.path.join("~", ".openml", "config"))

# Default values are actually added here in the _setup() function which is
# called at the end of this module
Expand All @@ -116,8 +122,8 @@ def get_server_base_url() -> str:
avoid_duplicate_runs = True if _defaults["avoid_duplicate_runs"] == "True" else False

# Number of retries if the connection breaks
connection_n_retries = _defaults["connection_n_retries"]
max_retries = _defaults["max_retries"]
connection_n_retries = int(_defaults["connection_n_retries"])
max_retries = int(_defaults["max_retries"])


class ConfigurationForExamples:
Expand Down Expand Up @@ -187,62 +193,78 @@ def _setup():
global connection_n_retries
global max_retries

# read config file, create cache directory
try:
os.mkdir(os.path.expanduser(os.path.join("~", ".openml")))
except FileExistsError:
# For other errors, we want to propagate the error as openml does not work without cache
pass
if platform.system() == "Linux":
config_dir = Path(os.environ.get("XDG_CONFIG_HOME", Path("~") / ".config" / "openml"))
else:
config_dir = Path("~") / ".openml"
# Still use os.path.expanduser to trigger the mock in the unit test
config_dir = Path(os.path.expanduser(config_dir))
config_file = config_dir / "config"

# read config file, create directory for config file
if not os.path.exists(config_dir):
try:
os.mkdir(config_dir)
cache_exists = True
except PermissionError:
cache_exists = False
else:
cache_exists = True

if cache_exists:
_create_log_handlers()
else:
_create_log_handlers(create_file_handler=False)
openml_logger.warning(
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
"not working properly." % config_dir
)

config = _parse_config()
config = _parse_config(config_file)
apikey = config.get("FAKE_SECTION", "apikey")
server = config.get("FAKE_SECTION", "server")

short_cache_dir = config.get("FAKE_SECTION", "cachedir")
cache_directory = os.path.expanduser(short_cache_dir)
cache_dir = config.get("FAKE_SECTION", "cachedir")
cache_directory = os.path.expanduser(cache_dir)

# create the cache subdirectory
try:
os.mkdir(cache_directory)
except FileExistsError:
# For other errors, we want to propagate the error as openml does not work without cache
pass
if not os.path.exists(cache_directory):
try:
os.mkdir(cache_directory)
except PermissionError:
openml_logger.warning(
"No permission to create openml cache directory at %s! This can result in "
"OpenML-Python not working properly." % cache_directory
)

avoid_duplicate_runs = config.getboolean("FAKE_SECTION", "avoid_duplicate_runs")
connection_n_retries = config.get("FAKE_SECTION", "connection_n_retries")
max_retries = config.get("FAKE_SECTION", "max_retries")
connection_n_retries = int(config.get("FAKE_SECTION", "connection_n_retries"))
max_retries = int(config.get("FAKE_SECTION", "max_retries"))
if connection_n_retries > max_retries:
raise ValueError(
"A higher number of retries than {} is not allowed to keep the "
"server load reasonable".format(max_retries)
)


def _parse_config():
def _parse_config(config_file: str):
""" Parse the config file, set up defaults. """
config = configparser.RawConfigParser(defaults=_defaults)

if not os.path.exists(config_file):
# Create an empty config file if there was none so far
fh = open(config_file, "w")
fh.close()
logger.info(
"Could not find a configuration file at %s. Going to "
"create an empty file there." % config_file
)

# The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
# Cheat the ConfigParser module by adding a fake section header
config_file_ = StringIO()
config_file_.write("[FAKE_SECTION]\n")
try:
# The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
# Cheat the ConfigParser module by adding a fake section header
config_file_ = StringIO()
config_file_.write("[FAKE_SECTION]\n")
with open(config_file) as fh:
for line in fh:
config_file_.write(line)
config_file_.seek(0)
config.read_file(config_file_)
except FileNotFoundError:
logger.info("No config file found at %s, using default configuration.", config_file)
except OSError as e:
logger.info("Error opening file %s: %s", config_file, e.message)
logger.info("Error opening file %s: %s", config_file, e.args[0])
config_file_.seek(0)
config.read_file(config_file_)
return config


Expand All @@ -257,11 +279,7 @@ def get_cache_directory():
"""
url_suffix = urlparse(server).netloc
reversed_url_suffix = os.sep.join(url_suffix.split(".")[::-1])
if not cache_directory:
_cachedir = _defaults(cache_directory)
else:
_cachedir = cache_directory
_cachedir = os.path.join(_cachedir, reversed_url_suffix)
_cachedir = os.path.join(cache_directory, reversed_url_suffix)
return _cachedir


Expand Down Expand Up @@ -297,4 +315,3 @@ def set_cache_directory(cachedir):
]

_setup()
_create_log_handlers()
14 changes: 0 additions & 14 deletions openml/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,8 @@
import time
from typing import Dict, Union, cast
import unittest
import warnings
import pandas as pd

# Currently, importing oslo raises a lot of warning that it will stop working
# under python3.8; remove this once they disappear
with warnings.catch_warnings():
warnings.simplefilter("ignore")
from oslo_concurrency import lockutils

import openml
from openml.tasks import TaskType
from openml.exceptions import OpenMLServerException
Expand Down Expand Up @@ -100,13 +93,6 @@ def setUp(self, n_levels: int = 1):
openml.config.avoid_duplicate_runs = False
openml.config.cache_directory = self.workdir

# If we're on travis, we save the api key in the config file to allow
# the notebook tests to read them.
if os.environ.get("TRAVIS") or os.environ.get("APPVEYOR"):
with lockutils.external_lock("config", lock_path=self.workdir):
with open(openml.config.config_file, "w") as fh:
fh.write("apikey = %s" % openml.config.apikey)

# Increase the number of retries to avoid spurious server failures
self.connection_n_retries = openml.config.connection_n_retries
openml.config.connection_n_retries = 10
Expand Down
10 changes: 6 additions & 4 deletions openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
limit=batch_size,
offset=current_offset,
output_format=output_format,
**active_filters
**active_filters,
)
except openml.exceptions.OpenMLServerNoResult:
# we want to return an empty dict in this case
Expand Down Expand Up @@ -277,9 +277,11 @@ def _create_cache_directory(key):
cache = config.get_cache_directory()
cache_dir = os.path.join(cache, key)
try:
os.makedirs(cache_dir)
except OSError:
pass
os.makedirs(cache_dir, exist_ok=True)
except Exception as e:
raise openml.exceptions.OpenMLCacheException(
f"Cannot create cache directory {cache_dir}."
) from e
return cache_dir


Expand Down
20 changes: 17 additions & 3 deletions tests/test_openml/test_config.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
# License: BSD 3-Clause

import tempfile
import os
import unittest.mock

import openml.config
import openml.testing


class TestConfig(openml.testing.TestBase):
def test_config_loading(self):
self.assertTrue(os.path.exists(openml.config.config_file))
self.assertTrue(os.path.isdir(os.path.expanduser("~/.openml")))
@unittest.mock.patch("os.path.expanduser")
@unittest.mock.patch("openml.config.openml_logger.warning")
@unittest.mock.patch("openml.config._create_log_handlers")
def test_non_writable_home(self, log_handler_mock, warnings_mock, expanduser_mock):
with tempfile.TemporaryDirectory(dir=self.workdir) as td:
expanduser_mock.side_effect = (
os.path.join(td, "openmldir"),
os.path.join(td, "cachedir"),
)
os.chmod(td, 0o444)
openml.config._setup()

self.assertEqual(warnings_mock.call_count, 2)
self.assertEqual(log_handler_mock.call_count, 1)
self.assertFalse(log_handler_mock.call_args_list[0][1]["create_file_handler"])


class TestConfigurationForExamples(openml.testing.TestBase):
Expand Down
30 changes: 22 additions & 8 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from openml.testing import TestBase
import os
import tempfile
import unittest.mock

import numpy as np
import openml
import sys

if sys.version_info[0] >= 3:
from unittest import mock
else:
import mock
import openml
from openml.testing import TestBase


class OpenMLTaskTest(TestBase):
Expand All @@ -20,7 +19,7 @@ def mocked_perform_api_call(call, request_method):
def test_list_all(self):
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)

@mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
def test_list_all_few_results_available(self, _perform_api_call):
# we want to make sure that the number of api calls is only 1.
# Although we have multiple versions of the iris dataset, there is only
Expand Down Expand Up @@ -86,3 +85,18 @@ def test_list_all_for_evaluations(self):

# might not be on test server after reset, please rerun test at least once if fails
self.assertEqual(len(evaluations), required_size)

@unittest.mock.patch("openml.config.get_cache_directory")
def test__create_cache_directory(self, config_mock):
with tempfile.TemporaryDirectory(dir=self.workdir) as td:
config_mock.return_value = td
openml.utils._create_cache_directory("abc")
self.assertTrue(os.path.exists(os.path.join(td, "abc")))
subdir = os.path.join(td, "def")
os.mkdir(subdir)
os.chmod(subdir, 0o444)
config_mock.return_value = subdir
with self.assertRaisesRegex(
openml.exceptions.OpenMLCacheException, r"Cannot create cache directory",
):
openml.utils._create_cache_directory("ghi")