diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index b6833a513..f33a2bb75 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -173,6 +173,36 @@ def __init__(self, name, description, format=None, else: self.data_pickle_file = None + def __str__(self): + header = "OpenML Dataset" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Name": self.name, + "Version": self.version, + "Format": self.format, + "Licence": self.licence, + "Download URL": self.url, + "Data file": self.data_file, + "Pickle file": self.data_pickle_file, + "# of features": len(self.features)} + if self.upload_date is not None: + fields["Upload Date"] = self.upload_date.replace('T', ' ') + if self.dataset_id is not None: + fields["OpenML URL"] = "{}d/{}".format(base_url, self.dataset_id) + if self.qualities['NumberOfInstances'] is not None: + fields["# of instances"] = int(self.qualities['NumberOfInstances']) + + # determines the order in which the information will be printed + order = ["Name", "Version", "Format", "Upload Date", "Licence", "Download URL", + "OpenML URL", "Data File", "Pickle File", "# of features", "# of instances"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body + def _data_arff_to_pickle(self, data_file): data_pickle_file = data_file.replace('.arff', '.pkl.py3') if os.path.exists(data_pickle_file): diff --git a/openml/evaluations/evaluation.py b/openml/evaluations/evaluation.py index a22b6598f..f22ec36cf 100644 --- a/openml/evaluations/evaluation.py +++ b/openml/evaluations/evaluation.py @@ -1,3 +1,5 @@ +import openml.config + class OpenMLEvaluation(object): """ @@ -47,3 +49,32 @@ def __init__(self, run_id, task_id, setup_id, flow_id, flow_name, self.value = value self.values = values self.array_data = array_data + + def __str__(self): + header = "OpenML Evaluation" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Upload Date": self.upload_time, + "Run ID": self.run_id, + "OpenML Run URL": "{}r/{}".format(base_url, self.run_id), + "Task ID": self.task_id, + "OpenML Task URL": "{}t/{}".format(base_url, self.task_id), + "Flow ID": self.flow_id, + "OpenML Flow URL": "{}f/{}".format(base_url, self.flow_id), + "Setup ID": self.setup_id, + "Data ID": self.data_id, + "Data Name": self.data_name, + "OpenML Data URL": "{}d/{}".format(base_url, self.data_id), + "Metric Used": self.function, + "Result": self.value} + + order = ["Uploader Date", "Run ID", "OpenML Run URL", "Task ID", "OpenML Task URL" + "Flow ID", "OpenML Flow URL", "Setup ID", "Data ID", "Data Name", + "OpenML Data URL", "Metric Used", "Result"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body diff --git a/openml/flows/flow.py b/openml/flows/flow.py index 829bc0745..c064cef33 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -7,6 +7,8 @@ from ..extensions import get_extension_by_flow from ..utils import extract_xml_tags, _tag_entity +import openml.config + class OpenMLFlow(object): """OpenML Flow. Stores machine learning models. @@ -132,6 +134,35 @@ def __init__(self, name, description, model, components, parameters, self.extension = get_extension_by_flow(self) + def __str__(self): + header = "OpenML Flow" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Flow Name": self.name, + "Flow Description": self.description, + "Dependencies": self.dependencies} + if self.flow_id is not None: + if self.version is not None: + fields["Flow ID"] = "{} (version {})".format(self.flow_id, self.version) + else: + fields["Flow ID"] = self.flow_id + fields["Flow URL"] = "{}f/{}".format(base_url, self.flow_id) + if self.upload_date is not None: + fields["Upload Date"] = self.upload_date.replace('T', ' ') + if self.binary_url is not None: + fields["Binary URL"] = self.binary_url + + # determines the order in which the information will be printed + order = ["Flow ID", "Flow URL", "Flow Name", "Flow Description", "Binary URL", + "Upload Date", "Dependencies"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body + def _to_xml(self) -> str: """Generate xml representation of self for upload to server. diff --git a/openml/runs/run.py b/openml/runs/run.py index 50982bead..2be56edbd 100644 --- a/openml/runs/run.py +++ b/openml/runs/run.py @@ -64,12 +64,40 @@ def __init__(self, task_id, flow_id, dataset_id, setup_string=None, self.predictions_url = predictions_url def __str__(self): - flow_name = self.flow_name - if flow_name is not None and len(flow_name) > 26: - # long enough to show sklearn.pipeline.Pipeline - flow_name = flow_name[:26] + "..." - return "[run id: {}, task id: {}, flow id: {}, flow name: {}]".format( - self.run_id, self.task_id, self.flow_id, flow_name) + header = "OpenML Run" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Uploader Name": self.uploader_name, + "Metric": self.task_evaluation_measure, + "Run ID": self.run_id, + "Task ID": self.task_id, + "Task Type": self.task_type, + "Task URL": "{}t/{}".format(base_url, self.task_id), + "Flow ID": self.flow_id, + "Flow Name": self.flow_name, + "Flow URL": "{}f/{}".format(base_url, self.flow_id), + "Setup ID": self.setup_id, + "Setup String": self.setup_string, + "Dataset ID": self.dataset_id, + "Dataset URL": "{}d/{}".format(base_url, self.dataset_id)} + if self.uploader is not None: + fields["Uploader Profile"] = "{}u/{}".format(base_url, self.uploader) + if self.run_id is not None: + fields["Run URL"] = "{}r/{}".format(base_url, self.run_id) + if self.task_evaluation_measure in self.evaluations: + fields["Result"] = self.evaluations[self.task_evaluation_measure] + + # determines the order in which the information will be printed + order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL", + "Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL", + "Setup ID", "Setup String", "Dataset ID", "Dataset URL"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body def _repr_pretty_(self, pp, cycle): pp.text(str(self)) diff --git a/openml/setups/setup.py b/openml/setups/setup.py index d5579b30c..9403a407c 100644 --- a/openml/setups/setup.py +++ b/openml/setups/setup.py @@ -1,3 +1,5 @@ +import openml.config + class OpenMLSetup(object): """Setup object (a.k.a. Configuration). @@ -25,6 +27,25 @@ def __init__(self, setup_id, flow_id, parameters): self.flow_id = flow_id self.parameters = parameters + def __str__(self): + header = "OpenML Setup" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Setup ID": self.setup_id, + "Flow ID": self.flow_id, + "Flow URL": "{}f/{}".format(base_url, self.flow_id), + "# of Parameters": len(self.parameters)} + + # determines the order in which the information will be printed + order = ["Setup ID", "Flow ID", "Flow URL", "# of Parameters"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body + class OpenMLParameter(object): """Parameter object (used in setup). @@ -60,3 +81,34 @@ def __init__(self, input_id, flow_id, flow_name, full_name, parameter_name, self.data_type = data_type self.default_value = default_value self.value = value + + def __str__(self): + header = "OpenML Parameter" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"ID": self.id, + "Flow ID": self.flow_id, + # "Flow Name": self.flow_name, + "Flow Name": self.full_name, + "Flow URL": "{}f/{}".format(base_url, self.flow_id), + "Parameter Name": self.parameter_name} + # indented prints for parameter attributes + # indention = 2 spaces + 1 | + 2 underscores + indent = "{}|{}".format(" " * 2, "_" * 2) + parameter_data_type = "{}Data Type".format(indent) + fields[parameter_data_type] = self.data_type + parameter_default = "{}Default".format(indent) + fields[parameter_default] = self.default_value + parameter_value = "{}Value".format(indent) + fields[parameter_value] = self.value + + # determines the order in which the information will be printed + order = ["ID", "Flow ID", "Flow Name", "Flow URL", "Parameter Name", + parameter_data_type, parameter_default, parameter_value] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body diff --git a/openml/study/study.py b/openml/study/study.py index 124fdb484..c7899d501 100644 --- a/openml/study/study.py +++ b/openml/study/study.py @@ -89,6 +89,39 @@ def __init__( self.runs = runs pass + def __str__(self): + # header is provided by the sub classes + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Name": self.name, + "Status": self.status, + "Main Entity Type": self.main_entity_type} + if self.id is not None: + fields["ID"] = self.id + fields["Study URL"] = "{}s/{}".format(base_url, self.id) + if self.creator is not None: + fields["Creator"] = "{}u/{}".format(base_url, self.creator) + if self.creation_date is not None: + fields["Upload Time"] = self.creation_date.replace('T', ' ') + if self.data is not None: + fields["# of Data"] = len(self.data) + if self.tasks is not None: + fields["# of Tasks"] = len(self.tasks) + if self.flows is not None: + fields["# of Flows"] = len(self.flows) + if self.runs is not None: + fields["# of Runs"] = len(self.runs) + + # determines the order in which the information will be printed + order = ["ID", "Name", "Status", "Main Entity Type", "Study URL", + "# of Data", "# of Tasks", "# of Flows", "# of Runs", + "Creator", "Upload Time"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return body + def publish(self) -> int: """ Publish the study on the OpenML server. @@ -233,6 +266,12 @@ def __init__( setups=setups, ) + def __str__(self): + header = "OpenML Study" + header = '{}\n{}\n'.format(header, '=' * len(header)) + body = super(OpenMLStudy, self).__str__() + return header + body + class OpenMLBenchmarkSuite(BaseStudy): @@ -302,3 +341,9 @@ def __init__( runs=None, setups=None, ) + + def __str__(self): + header = "OpenML Benchmark Suite" + header = '{}\n{}\n'.format(header, '=' * len(header)) + body = super(OpenMLBenchmarkSuite, self).__str__() + return header + body diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 0847189b6..84bbe2ae0 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -42,6 +42,36 @@ def __init__( self.estimation_procedure_id = estimation_procedure_id self.split = None # type: Optional[OpenMLSplit] + def __str__(self): + header = "OpenML Task" + header = '{}\n{}\n'.format(header, '=' * len(header)) + + base_url = "{}".format(openml.config.server[:-len('api/v1/xml')]) + fields = {"Task Type": self.task_type} + if self.task_id is not None: + fields["Task ID"] = self.task_id + fields["Task URL"] = "{}t/{}".format(base_url, self.task_id) + if self.evaluation_measure is not None: + fields["Evaluation Measure"] = self.evaluation_measure + if self.estimation_procedure is not None: + fields["Estimation Procedure"] = self.estimation_procedure['type'] + if self.target_name is not None: + fields["Target Feature"] = self.target_name + if hasattr(self, 'class_labels'): + fields["# of Classes"] = len(self.class_labels) + if hasattr(self, 'cost_matrix'): + fields["Cost Matrix"] = "Available" + + # determines the order in which the information will be printed + order = ["Task Type", "Task ID", "Task URL", "Estimation Procedure", "Evaluation Measure", + "Target Feature", "# of Classes", "Cost Matrix"] + fields = [(key, fields[key]) for key in order if key in fields] + + longest_field_name_length = max(len(name) for name, value in fields) + field_line_format = "{{:.<{}}}: {{}}".format(longest_field_name_length) + body = '\n'.join(field_line_format.format(name, value) for name, value in fields) + return header + body + def get_dataset(self) -> datasets.OpenMLDataset: """Download dataset associated with task""" return datasets.get_dataset(self.dataset_id)