Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions openml/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,36 @@ def __init__(self, name, description, format=None,
else:
self.data_pickle_file = None

def __str__(self):
header = "OpenML Dataset"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Name": self.name,
"Version": self.version,
"Format": self.format,
"Licence": self.licence,
"Download URL": self.url,
"Data file": self.data_file,
"Pickle file": self.data_pickle_file,
"# of features": len(self.features)}
if self.upload_date is not None:
fields["Upload Date"] = self.upload_date.replace('T', ' ')
if self.dataset_id is not None:
fields["OpenML URL"] = "{}d/{}".format(base_url, self.dataset_id)
if self.qualities['NumberOfInstances'] is not None:
fields["# of instances"] = int(self.qualities['NumberOfInstances'])

# determines the order in which the information will be printed
order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL",
"OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body

def _data_arff_to_pickle(self, data_file):
data_pickle_file = data_file.replace('.arff', '.pkl.py3')
if os.path.exists(data_pickle_file):
Expand Down
31 changes: 31 additions & 0 deletions openml/evaluations/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import openml.config


class OpenMLEvaluation(object):
"""
Expand Down Expand Up @@ -47,3 +49,32 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name,
self.value = value
self.values = values
self.array_data = array_data

def __str__(self):
header = "OpenML Evaluation"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Upload Date": self.upload_time,
"Run ID": self.run_id,
"OpenML Run URL": "{}r/{}".format(base_url, self.run_id),
"Task ID": self.task_id,
"OpenML Task URL": "{}t/{}".format(base_url, self.task_id),
"Flow ID": self.flow_id,
"OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id),
"Setup ID": self.setup_id,
"Data ID": self.data_id,
"Data Name": self.data_name,
"OpenML Data URL": "{}d/{}".format(base_url, self.data_id),
"Metric Used": self.function,
"Result": self.value}

order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL"
"Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name",
"OpenML Data URL", "Metric Used", "Result"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body
31 changes: 31 additions & 0 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from ..extensions import get_extension_by_flow
from ..utils import extract_xml_tags, _tag_entity

import openml.config


class OpenMLFlow(object):
"""OpenML Flow. Stores machine learning models.
Expand Down Expand Up @@ -132,6 +134,35 @@ def __init__(self, name, description, model, components, parameters,

self.extension = get_extension_by_flow(self)

def __str__(self):
header = "OpenML Flow"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Flow Name": self.name,
"Flow Description": self.description,
"Dependencies": self.dependencies}
if self.flow_id is not None:
if self.version is not None:
fields["Flow ID"] = "{} (version {})".format(self.flow_id, self.version)
else:
fields["Flow ID"] = self.flow_id
fields["Flow URL"] = "{}f/{}".format(base_url, self.flow_id)
if self.upload_date is not None:
fields["Upload Date"] = self.upload_date.replace('T', ' ')
if self.binary_url is not None:
fields["Binary URL"] = self.binary_url

# determines the order in which the information will be printed
order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL",
"Upload Date", "Dependencies"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body

def _to_xml(self) -> str:
"""Generate xml representation of self for upload to server.

Expand Down
40 changes: 34 additions & 6 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,40 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None,
self.predictions_url = predictions_url

def __str__(self):
flow_name = self.flow_name
if flow_name is not None and len(flow_name) > 26:
# long enough to show sklearn.pipeline.Pipeline
flow_name = flow_name[:26] + "..."
return "[run id: {}, task id: {}, flow id: {}, flow name: {}]".format(
self.run_id, self.task_id, self.flow_id, flow_name)
header = "OpenML Run"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Uploader Name": self.uploader_name,
"Metric": self.task_evaluation_measure,
"Run ID": self.run_id,
"Task ID": self.task_id,
"Task Type": self.task_type,
"Task URL": "{}t/{}".format(base_url, self.task_id),
"Flow ID": self.flow_id,
"Flow Name": self.flow_name,
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
"Setup ID": self.setup_id,
"Setup String": self.setup_string,
"Dataset ID": self.dataset_id,
"Dataset URL": "{}d/{}".format(base_url, self.dataset_id)}
if self.uploader is not None:
fields["Uploader Profile"] = "{}u/{}".format(base_url, self.uploader)
if self.run_id is not None:
fields["Run URL"] = "{}r/{}".format(base_url, self.run_id)
if self.task_evaluation_measure in self.evaluations:
fields["Result"] = self.evaluations[self.task_evaluation_measure]

# determines the order in which the information will be printed
order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
"Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
"Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body

def _repr_pretty_(self, pp, cycle):
pp.text(str(self))
Expand Down
52 changes: 52 additions & 0 deletions openml/setups/setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import openml.config


class OpenMLSetup(object):
"""Setup object (a.k.a. Configuration).
Expand Down Expand Up @@ -25,6 +27,25 @@ def __init__(self, setup_id, flow_id, parameters):
self.flow_id = flow_id
self.parameters = parameters

def __str__(self):
header = "OpenML Setup"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Setup ID": self.setup_id,
"Flow ID": self.flow_id,
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
"# of Parameters": len(self.parameters)}

# determines the order in which the information will be printed
order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body


class OpenMLParameter(object):
"""Parameter object (used in setup).
Expand Down Expand Up @@ -60,3 +81,34 @@ def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name,
self.data_type = data_type
self.default_value = default_value
self.value = value

def __str__(self):
header = "OpenML Parameter"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"ID": self.id,
"Flow ID": self.flow_id,
# "Flow Name": self.flow_name,
"Flow Name": self.full_name,
"Flow URL": "{}f/{}".format(base_url, self.flow_id),
"Parameter Name": self.parameter_name}
# indented prints for parameter attributes
# indention = 2 spaces + 1 | + 2 underscores
indent = "{}|{}".format(" " * 2, "_" * 2)
parameter_data_type = "{}Data Type".format(indent)
fields[parameter_data_type] = self.data_type
parameter_default = "{}Default".format(indent)
fields[parameter_default] = self.default_value
parameter_value = "{}Value".format(indent)
fields[parameter_value] = self.value

# determines the order in which the information will be printed
order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name",
parameter_data_type, parameter_default, parameter_value]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body
45 changes: 45 additions & 0 deletions openml/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,39 @@ def __init__(
self.runs = runs
pass

def __str__(self):
# header is provided by the sub classes
base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Name": self.name,
"Status": self.status,
"Main Entity Type": self.main_entity_type}
if self.id is not None:
fields["ID"] = self.id
fields["Study URL"] = "{}s/{}".format(base_url, self.id)
if self.creator is not None:
fields["Creator"] = "{}u/{}".format(base_url, self.creator)
if self.creation_date is not None:
fields["Upload Time"] = self.creation_date.replace('T', ' ')
if self.data is not None:
fields["# of Data"] = len(self.data)
if self.tasks is not None:
fields["# of Tasks"] = len(self.tasks)
if self.flows is not None:
fields["# of Flows"] = len(self.flows)
if self.runs is not None:
fields["# of Runs"] = len(self.runs)

# determines the order in which the information will be printed
order = ["ID", "Name", "Status", "Main Entity Type", "Study URL",
"# of Data", "# of Tasks", "# of Flows", "# of Runs",
"Creator", "Upload Time"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return body

def publish(self) -> int:
"""
Publish the study on the OpenML server.
Expand Down Expand Up @@ -233,6 +266,12 @@ def __init__(
setups=setups,
)

def __str__(self):
header = "OpenML Study"
header = '{}\n{}\n'.format(header, '=' * len(header))
body = super(OpenMLStudy, self).__str__()
return header + body


class OpenMLBenchmarkSuite(BaseStudy):

Expand Down Expand Up @@ -302,3 +341,9 @@ def __init__(
runs=None,
setups=None,
)

def __str__(self):
header = "OpenML Benchmark Suite"
header = '{}\n{}\n'.format(header, '=' * len(header))
body = super(OpenMLBenchmarkSuite, self).__str__()
return header + body
30 changes: 30 additions & 0 deletions openml/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,36 @@ def __init__(
self.estimation_procedure_id = estimation_procedure_id
self.split = None # type: Optional[OpenMLSplit]

def __str__(self):
header = "OpenML Task"
header = '{}\n{}\n'.format(header, '=' * len(header))

base_url = "{}".format(openml.config.server[:-len('api/v1/xml')])
fields = {"Task Type": self.task_type}
if self.task_id is not None:
fields["Task ID"] = self.task_id
fields["Task URL"] = "{}t/{}".format(base_url, self.task_id)
if self.evaluation_measure is not None:
fields["Evaluation Measure"] = self.evaluation_measure
if self.estimation_procedure is not None:
fields["Estimation Procedure"] = self.estimation_procedure['type']
if self.target_name is not None:
fields["Target Feature"] = self.target_name
if hasattr(self, 'class_labels'):
fields["# of Classes"] = len(self.class_labels)
if hasattr(self, 'cost_matrix'):
fields["Cost Matrix"] = "Available"

# determines the order in which the information will be printed
order = ["Task Type", "Task ID", "Task URL", "Estimation Procedure", "Evaluation Measure",
"Target Feature", "# of Classes", "Cost Matrix"]
fields = [(key, fields[key]) for key in order if key in fields]

longest_field_name_length = max(len(name) for name, value in fields)
field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length)
body = '\n'.join(field_line_format.format(name, value) for name, value in fields)
return header + body

def get_dataset(self) -> datasets.OpenMLDataset:
"""Download dataset associated with task"""
return datasets.get_dataset(self.dataset_id)
Expand Down