Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions examples/30_extended/create_upload_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@

############################################################################

upload_did = diabetes_dataset.publish()
print(f"URL for dataset: {openml.config.server}/data/{upload_did}")
diabetes_dataset.publish()
print(f"URL for dataset: {diabetes_dataset.openml_url}")

############################################################################
# Dataset is a list
Expand Down Expand Up @@ -192,8 +192,8 @@

############################################################################

upload_did = weather_dataset.publish()
print(f"URL for dataset: {openml.config.server}/data/{upload_did}")
weather_dataset.publish()
print(f"URL for dataset: {weather_dataset.openml_url}")

############################################################################
# Dataset is a pandas DataFrame
Expand Down Expand Up @@ -238,8 +238,8 @@

############################################################################

upload_did = weather_dataset.publish()
print(f"URL for dataset: {openml.config.server}/data/{upload_did}")
weather_dataset.publish()
print(f"URL for dataset: {weather_dataset.openml_url}")

############################################################################
# Dataset is a sparse matrix
Expand Down Expand Up @@ -275,8 +275,8 @@

############################################################################

upload_did = xor_dataset.publish()
print(f"URL for dataset: {openml.config.server}/data/{upload_did}")
xor_dataset.publish()
print(f"URL for dataset: {xor_dataset.openml_url}")


############################################################################
Expand Down Expand Up @@ -310,8 +310,8 @@

############################################################################

upload_did = xor_dataset.publish()
print(f"URL for dataset: {openml.config.server}/data/{upload_did}")
xor_dataset.publish()
print(f"URL for dataset: {xor_dataset.openml_url}")


############################################################################
Expand Down
32 changes: 30 additions & 2 deletions openml/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from collections import OrderedDict
import re
from typing import Optional, List, Tuple, Union
from typing import Optional, List, Tuple, Union, Dict
import webbrowser

import xmltodict

import openml.config
from .utils import _tag_openml_base
from .utils import _tag_openml_base, _get_rest_api_type_alias


class OpenMLBase(ABC):
Expand Down Expand Up @@ -104,6 +104,34 @@ def _to_xml(self) -> str:
encoding_specification, xml_body = xml_representation.split('\n', 1)
return xml_body

def _get_file_elements(self) -> Dict:
""" Get file_elements to upload to the server, called during Publish.

Derived child classes should overwrite this method as necessary.
The description field will be populated automatically if not provided.
"""
return {}

@abstractmethod
def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
pass

def publish(self) -> 'OpenMLBase':
file_elements = self._get_file_elements()

if 'description' not in file_elements:
file_elements['description'] = self._to_xml()

call = '{}/'.format(_get_rest_api_type_alias(self))
response_text = openml._api_calls._perform_api_call(
call, 'post', file_elements=file_elements
)
xml_response = xmltodict.parse(response_text)

self._parse_publish_response(xml_response)
return self

def open_in_browser(self):
""" Opens the OpenML web page corresponding to this object in your default browser. """
webbrowser.open(self.openml_url)
Expand Down
61 changes: 19 additions & 42 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@
import numpy as np
import pandas as pd
import scipy.sparse
import xmltodict
from warnings import warn

import openml._api_calls
from openml.base import OpenMLBase
from .data_feature import OpenMLDataFeature
from ..exceptions import PyOpenMLError
Expand Down Expand Up @@ -728,49 +726,28 @@ def get_features_by_type(self, data_type, exclude=None,
result.append(idx - offset)
return result

def publish(self):
"""Publish the dataset on the OpenML server.
def _get_file_elements(self) -> Dict:
""" Adds the 'dataset' to file elements. """
file_elements = {}
path = None if self.data_file is None else os.path.abspath(self.data_file)

Upload the dataset description and dataset content to openml.

Returns
-------
dataset_id: int
Id of the dataset uploaded to the server.
"""
file_elements = {'description': self._to_xml()}

# the arff dataset string is available
if self._dataset is not None:
file_elements['dataset'] = self._dataset
else:
# the path to the arff dataset is given
if self.data_file is not None:
path = os.path.abspath(self.data_file)
if os.path.exists(path):
try:

with io.open(path, encoding='utf8') as fh:
# check if arff is valid
decoder = arff.ArffDecoder()
decoder.decode(fh, encode_nominal=True)
except arff.ArffException:
raise ValueError("The file you have provided is not "
"a valid arff file.")

with open(path, 'rb') as fp:
file_elements['dataset'] = fp.read()
else:
if self.url is None:
raise ValueError("No url/path to the data file was given")

return_value = openml._api_calls._perform_api_call(
"data/", 'post',
file_elements=file_elements,
)
response = xmltodict.parse(return_value)
self.dataset_id = int(response['oml:upload_data_set']['oml:id'])
return self.dataset_id
elif path is not None and os.path.exists(path):
with open(path, 'rb') as fp:
file_elements['dataset'] = fp.read()
try:
dataset_utf8 = str(file_elements['dataset'], 'utf8')
arff.ArffDecoder().decode(dataset_utf8, encode_nominal=True)
except arff.ArffException:
raise ValueError("The file you have provided is not a valid arff file.")
elif self.url is None:
raise ValueError("No valid url/path to the data file was given.")
return file_elements

def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
self.dataset_id = int(xml_response['oml:upload_data_set']['oml:id'])

def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
""" Creates a dictionary representation of self. """
Expand Down
15 changes: 6 additions & 9 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,10 @@ def from_filesystem(cls, input_directory) -> 'OpenMLFlow':
xml_string = f.read()
return OpenMLFlow._from_dict(xmltodict.parse(xml_string))

def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
self.flow_id = int(xml_response['oml:upload_flow']['oml:id'])

def publish(self, raise_error_if_exists: bool = False) -> 'OpenMLFlow':
""" Publish this flow to OpenML server.

Expand Down Expand Up @@ -379,15 +383,8 @@ def publish(self, raise_error_if_exists: bool = False) -> 'OpenMLFlow':
if self.flow_id:
raise openml.exceptions.PyOpenMLError("Flow does not exist on the server, "
"but 'flow.flow_id' is not None.")
xml_description = self._to_xml()
file_elements = {'description': xml_description}
return_value = openml._api_calls._perform_api_call(
"flow/",
'post',
file_elements=file_elements,
)
server_response = xmltodict.parse(return_value)
flow_id = int(server_response['oml:upload_flow']['oml:id'])
super().publish()
flow_id = self.flow_id
elif raise_error_if_exists:
error_message = "This OpenMLFlow already exists with id: {}.".format(flow_id)
raise openml.exceptions.PyOpenMLError(error_message)
Expand Down
29 changes: 10 additions & 19 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from collections import OrderedDict
import pickle
import time
from typing import Any, IO, TextIO, List, Union, Tuple, Optional # noqa F401
from typing import Any, IO, TextIO, List, Union, Tuple, Optional, Dict # noqa F401
import os

import arff
import numpy as np
import xmltodict

import openml
import openml._api_calls
Expand Down Expand Up @@ -428,16 +427,15 @@ def _attribute_list_to_dict(attribute_list):
scores.append(sklearn_fn(y_true, y_pred, **kwargs))
return np.array(scores)

def publish(self) -> 'OpenMLRun':
""" Publish a run (and if necessary, its flow) to the OpenML server.
def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
self.run_id = int(xml_response['oml:upload_run']['oml:run_id'])

Uploads the results of a run to OpenML.
If the run is of an unpublished OpenMLFlow, the flow will be uploaded too.
Sets the run_id on self.
def _get_file_elements(self) -> Dict:
""" Get file_elements to upload to the server.

Returns
-------
self : OpenMLRun
Derived child classes should overwrite this method as necessary.
The description field will be populated automatically if not provided.
"""
if self.model is None:
raise PyOpenMLError(
Expand All @@ -463,8 +461,7 @@ def publish(self) -> 'OpenMLRun':
self.model,
)

description_xml = self._to_xml()
file_elements = {'description': ("description.xml", description_xml)}
file_elements = {'description': ("description.xml", self._to_xml())}

if self.error_message is None:
predictions = arff.dumps(self._generate_arff_dict())
Expand All @@ -473,13 +470,7 @@ def publish(self) -> 'OpenMLRun':
if self.trace is not None:
trace_arff = arff.dumps(self.trace.trace_to_arff())
file_elements['trace'] = ("trace.arff", trace_arff)

return_value = openml._api_calls._perform_api_call(
"/run/", 'post', file_elements=file_elements
)
result = xmltodict.parse(return_value)
self.run_id = int(result['oml:upload_run']['oml:run_id'])
return self
return file_elements

def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
""" Creates a dictionary representation of self. """
Expand Down
25 changes: 3 additions & 22 deletions openml/study/study.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, Union, Any

import xmltodict

import openml
from openml.base import OpenMLBase

Expand Down Expand Up @@ -124,26 +122,9 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
"Creator", "Upload Time"]
return [(key, fields[key]) for key in order if key in fields]

def publish(self) -> int:
"""
Publish the study on the OpenML server.

Returns
-------
study_id: int
Id of the study uploaded to the server.
"""
file_elements = {
'description': self._to_xml()
}
return_value = openml._api_calls._perform_api_call(
"study/",
'post',
file_elements=file_elements,
)
study_res = xmltodict.parse(return_value)
self.study_id = int(study_res['oml:study_upload']['oml:id'])
return self.study_id
def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
self.study_id = int(xml_response['oml:study_upload']['oml:id'])

def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':
""" Creates a dictionary representation of self. """
Expand Down
28 changes: 3 additions & 25 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numpy as np
import pandas as pd
import scipy.sparse
import xmltodict

import openml._api_calls
from openml.base import OpenMLBase
Expand Down Expand Up @@ -181,30 +180,9 @@ def _to_dict(self) -> 'OrderedDict[str, OrderedDict]':

return task_container

def publish(self) -> int:
"""Publish task to OpenML server.

Returns
-------
task_id: int
Returns the id of the uploaded task
if successful.

"""

xml_description = self._to_xml()

file_elements = {'description': xml_description}

return_value = openml._api_calls._perform_api_call(
"task/",
'post',
file_elements=file_elements,
)

task_id = int(xmltodict.parse(return_value)['oml:upload_task']['oml:id'])

return task_id
def _parse_publish_response(self, xml_response: Dict):
""" Parse the id from the xml_response and assign it to self. """
self.task_id = int(xml_response['oml:upload_task']['oml:id'])


class OpenMLSupervisedTask(OpenMLTask, ABC):
Expand Down
15 changes: 11 additions & 4 deletions openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import hashlib
import xmltodict
import shutil
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, List, Tuple, Union, Type
import warnings
import pandas as pd
from functools import wraps
Expand Down Expand Up @@ -68,16 +68,23 @@ def extract_xml_tags(xml_tag_name, node, allow_none=True):
(xml_tag_name, str(node)))


def _tag_openml_base(oml_object: 'OpenMLBase', tag: str, untag: bool = False):
def _get_rest_api_type_alias(oml_object: 'OpenMLBase') -> str:
""" Return the alias of the openml entity as it is defined for the REST API. """
rest_api_mapping = [
(openml.datasets.OpenMLDataset, 'data'),
(openml.flows.OpenMLFlow, 'flow'),
(openml.tasks.OpenMLTask, 'task'),
(openml.runs.OpenMLRun, 'run')
]
(openml.runs.OpenMLRun, 'run'),
((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), 'study')
] # type: List[Tuple[Union[Type, Tuple], str]]
_, api_type_alias = [(python_type, api_alias)
for (python_type, api_alias) in rest_api_mapping
if isinstance(oml_object, python_type)][0]
return api_type_alias


def _tag_openml_base(oml_object: 'OpenMLBase', tag: str, untag: bool = False):
api_type_alias = _get_rest_api_type_alias(oml_object)
_tag_entity(api_type_alias, oml_object.id, tag, untag)


Expand Down
Loading