Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions examples/30_extended/plot_svm_hyperparameters_tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
================================
Plotting hyperparameter surfaces
================================
"""
import openml
import numpy as np

####################################################################################################
# First step - obtaining the data
# ===============================
# First, we nood to choose an SVM flow, for example 8353, and a task. Finding the IDs of them are
# not part of this tutorial, this could for example be done via the website.
#
# For this we use the function ``list_evaluations_setup`` which can automatically join
# evaluations conducted by the server with the hyperparameter settings extracted from the
# uploaded runs (called *setup*).
df = openml.evaluations.list_evaluations_setups(
function='predictive_accuracy',
flow=[8353],
task=[6],
output_format='dataframe',
# Using this flag incorporates the hyperparameters into the returned dataframe. Otherwise,
# the dataframe would contain a field ``paramaters`` containing an unparsed dictionary.
parameters_in_separate_columns=True,
)
print(df.head(n=10))

####################################################################################################
# We can see all the hyperparameter names in the columns of the dataframe:
for name in df.columns:
print(name)

####################################################################################################
# Next, we cast and transform the hyperparameters of interest (``C`` and ``gamma``) so that we
# can nicely plot them.
hyperparameters = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about printing the column names here to show that sklearn.svm.classes.SVC(16)_C & sklearn.svm.classes.SVC(16)_gamma or a check whether it is in the colnames list with the in operator?

df[hyperparameters] = df[hyperparameters].astype(float).apply(np.log)

####################################################################################################
# Option 1 - plotting via the pandas helper functions
# ===================================================
#
df.plot.hexbin(
x='sklearn.svm.classes.SVC(16)_C',
y='sklearn.svm.classes.SVC(16)_gamma',
C='value',
reduce_C_function=np.mean,
gridsize=25,
title='SVM performance landscape',
)

####################################################################################################
# Option 2 - plotting via matplotlib
# ==================================
#
import matplotlib.pyplot as plt

fig, ax = plt.subplots()

C = df['sklearn.svm.classes.SVC(16)_C']
gamma = df['sklearn.svm.classes.SVC(16)_gamma']
score = df['value']

# Plotting all evaluations:
ax.plot(C, gamma, 'ko', ms=1)
# Create a contour plot
cntr = ax.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
# Adjusting the colorbar
fig.colorbar(cntr, ax=ax, label="accuracy")
# Adjusting the axis limits
ax.set(
xlim=(min(C), max(C)),
ylim=(min(gamma), max(gamma)),
xlabel="C (log10)",
ylabel="gamma (log10)",
)
ax.set_title('SVM performance landscape')
31 changes: 31 additions & 0 deletions tests/test_evaluations/test_evaluations_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest


class TestEvaluationsExample(unittest.TestCase):

def test_example_python_paper(self):
# Example script which will appear in the upcoming OpenML-Python paper
# This test ensures that the example will keep running!

import openml
import numpy as np
import matplotlib.pyplot as plt

df = openml.evaluations.list_evaluations_setups(
'predictive_accuracy',
flow=[8353],
task=[6],
output_format='dataframe',
parameters_in_separate_columns=True,
) # Choose an SVM flow, for example 8353, and a task.

hp_names = ['sklearn.svm.classes.SVC(16)_C', 'sklearn.svm.classes.SVC(16)_gamma']
df[hp_names] = df[hp_names].astype(float).apply(np.log)
C, gamma, score = df[hp_names[0]], df[hp_names[1]], df['value']

cntr = plt.tricontourf(C, gamma, score, levels=12, cmap="RdBu_r")
plt.colorbar(cntr, label="accuracy")
plt.xlim((min(C), max(C)))
plt.ylim((min(gamma), max(gamma)))
plt.xlabel("C (log10)")
plt.ylabel("gamma (log10)")