Show Sidebar Hide Sidebar

# Multi-Class SGD On The Iris Dataset in Scikit-learn

Plot decision surface of multi-class SGD on iris dataset. The hyperplanes corresponding to the three one-versus-all (OVA) classifiers are represented by the dashed lines.

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18.1'

### Imports¶

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.linear_model import SGDClassifier

Automatically created module for IPython interactive environment


### Calculations¶

In [3]:
# import some data to play with
X = iris.data[:, :2]  # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = iris.target
colors = ["blue", "red", "yellow"]

# shuffle
idx = np.arange(X.shape[0])
np.random.seed(13)
np.random.shuffle(idx)
X = X[idx]
y = y[idx]

# standardize
mean = X.mean(axis=0)
std = X.std(axis=0)
X = (X - mean) / std

h = .02  # step size in the mesh

clf = SGDClassifier(alpha=0.001, n_iter=100).fit(X, y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

x_ = np.arange(x_min, x_max, h)
y_ = np.arange(y_min, y_max, h)
xx, yy = np.meshgrid(x_, y_)


### Plot Results¶

In [4]:
def matplotlib_to_plotly(cmap, pl_entries):
h = 1.0/(pl_entries-1)
pl_colorscale = []

for k in range(pl_entries):
C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

return pl_colorscale

cmap = matplotlib_to_plotly(plt.cm.Paired, 5)

In [5]:
data = []
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
cs = go.Contour(x=x_, y=y_, z=Z,
colorscale=cmap,
showscale=False
)
data.append(cs)
# Plot also the training points
xmin = min(X[idx, 0])
xmax = max(X[idx, 0])

for i, color in zip(clf.classes_, colors):
idx = np.where(y == i)
t = go.Scatter(x=X[idx, 0][0], y=X[idx, 1][0],
mode='markers',
marker=dict(color=colors[i],
line=dict(color='black', width=1)),
name=iris.target_names[i],
)
data.append(t)

# Plot the three one-against-all classifiers
coef = clf.coef_
intercept = clf.intercept_

def plot_hyperplane(c, color):
def line(x0):
return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]

trace = go.Scatter(x=[x_min, x_max], y=[line(x_min), line(x_max)],
mode='lines',
line=dict(color=color, dash='dash'),
showlegend=False)
return trace

for i, color in zip(clf.classes_, colors):
data.append(plot_hyperplane(i, color))

layout = go.Layout(title="Decision surface of multi-class SGD",
xaxis=dict(range=[min(x_), max(x_)]),
yaxis=dict(range=[min(y_), max(y_)]),
)
fig = go.Figure(data=data, layout=layout)

In [6]:
py.iplot(fig)

Out[6]:
Still need help?