diff --git a/openml/_api_calls.py b/openml/_api_calls.py index 959cad51a..87511693c 100644 --- a/openml/_api_calls.py +++ b/openml/_api_calls.py @@ -23,6 +23,14 @@ ) +def _create_url_from_endpoint(endpoint: str) -> str: + url = config.server + if not url.endswith("/"): + url += "/" + url += endpoint + return url.replace("=", "%3d") + + def _perform_api_call(call, request_method, data=None, file_elements=None): """ Perform an API call at the OpenML server. @@ -50,12 +58,7 @@ def _perform_api_call(call, request_method, data=None, file_elements=None): return_value : str Return value of the OpenML server """ - url = config.server - if not url.endswith("/"): - url += "/" - url += call - - url = url.replace("=", "%3d") + url = _create_url_from_endpoint(call) logging.info("Starting [%s] request for the URL %s", request_method, url) start = time.time() diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index fb2e201f6..1e6fb5c78 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -3,6 +3,7 @@ import io import logging import os +from pyexpat import ExpatError from typing import List, Dict, Union, Optional, cast import numpy as np @@ -19,6 +20,7 @@ from .dataset import OpenMLDataset from ..exceptions import ( OpenMLHashException, + OpenMLServerError, OpenMLServerException, OpenMLPrivateDatasetError, ) @@ -437,7 +439,7 @@ def get_dataset( parquet_file = None remove_dataset_cache = False except OpenMLServerException as e: - # if there was an exception, + # if there was an exception # check if the user had access to the dataset if e.code == 112: raise OpenMLPrivateDatasetError(e.message) from None @@ -949,14 +951,18 @@ def _get_dataset_description(did_cache_dir, dataset_id): try: with io.open(description_file, encoding="utf8") as fh: dataset_xml = fh.read() + description = xmltodict.parse(dataset_xml)["oml:data_set_description"] except Exception: url_extension = "data/{}".format(dataset_id) dataset_xml = openml._api_calls._perform_api_call(url_extension, "get") + try: + description = xmltodict.parse(dataset_xml)["oml:data_set_description"] + except ExpatError as e: + url = openml._api_calls._create_url_from_endpoint(url_extension) + raise OpenMLServerError(f"Dataset description XML at '{url}' is malformed.") from e with io.open(description_file, "w", encoding="utf8") as fh: fh.write(dataset_xml) - description = xmltodict.parse(dataset_xml)["oml:data_set_description"] - return description diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 995474142..50f449ebb 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1240,7 +1240,7 @@ def _wait_for_dataset_being_processed(self, dataset_id): try: downloaded_dataset = openml.datasets.get_dataset(dataset_id) break - except Exception as e: + except OpenMLServerException as e: # returned code 273: Dataset not processed yet # returned code 362: No qualities found TestBase.logger.error(