Show Sidebar Hide Sidebar

# Decision Boundary of Label Propagation versus SVM on the Iris dataset in Scikit-learn

Comparison for decision boundary generated on iris dataset between Label Propagation and SVM.

This demonstrates Label Propagation learning a good boundary even with a small amount of labeled data.

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18.1'

### Imports¶

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import svm
from sklearn.semi_supervised import label_propagation

Automatically created module for IPython interactive environment


### Calculations¶

In [3]:
rng = np.random.RandomState(0)

X = iris.data[:, :2]
y = iris.target

# step size in the mesh
h = .02

y_30 = np.copy(y)
y_30[rng.rand(len(y)) < 0.3] = -1
y_50 = np.copy(y)
y_50[rng.rand(len(y)) < 0.5] = -1
# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
y_30)
y_50)
rbf_svc = (svm.SVC(kernel='rbf').fit(X, y), y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_ = np.arange(x_min, x_max, h)
y_ = np.arange(y_min, y_max, h)
xx, yy = np.meshgrid(x_, y_)

# title for the plots
titles = ['Label Spreading 30% data',
'SVC with rbf kernel']


### Plot Results¶

In [4]:
fig = tools.make_subplots(rows=2, cols=2,
subplot_titles=tuple(titles),
print_grid=False)

def matplotlib_to_plotly(cmap, pl_entries):
h = 1.0/(pl_entries-1)
pl_colorscale = []

for k in range(pl_entries):
C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

return pl_colorscale
cmap = matplotlib_to_plotly(plt.cm.Paired, 6)

In [5]:
for i, (clf, y_train) in enumerate((ls30, ls50, ls100, rbf_svc)):
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
trace1 = go.Heatmap(x=x_, y=y_, z=Z,
colorscale=cmap,
showscale=False)
fig.append_trace(trace1, i/2+1, i%2+1)
# Plot also the training points
trace2 = go.Scatter(x=X[:, 0], y=X[:, 1],
mode='markers',
showlegend=False,
marker=dict(color=X[:, 0],
colorscale=cmap,
line=dict(width=1, color='black'))
)
fig.append_trace(trace2, i/2+1, i%2+1)

for i in map(str,range(1, 5)):
y = 'yaxis' + i
x = 'xaxis' + i
fig['layout'][y].update(showticklabels=False, ticks='')
fig['layout'][x].update(showticklabels=False, ticks='')

fig['layout'].update(height=700)

In [6]:
py.iplot(fig)

Out[6]:

Authors:

     Clay Woolam <clay@woolam.org>



     BSD