Show Sidebar Hide Sidebar

Decision Boundary of Label Propagation versus SVM on the Iris dataset in Scikit-learn

Comparison for decision boundary generated on iris dataset between Label Propagation and SVM.

This demonstrates Label Propagation learning a good boundary even with a small amount of labeled data.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version

In [1]:
import sklearn
sklearn.__version__
Out[1]:
'0.18.1'

Imports

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import svm
from sklearn.semi_supervised import label_propagation
Automatically created module for IPython interactive environment

Calculations

In [3]:
rng = np.random.RandomState(0)

iris = datasets.load_iris()

X = iris.data[:, :2]
y = iris.target

# step size in the mesh
h = .02

y_30 = np.copy(y)
y_30[rng.rand(len(y)) < 0.3] = -1
y_50 = np.copy(y)
y_50[rng.rand(len(y)) < 0.5] = -1
# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
ls30 = (label_propagation.LabelSpreading().fit(X, y_30),
        y_30)
ls50 = (label_propagation.LabelSpreading().fit(X, y_50),
        y_50)
ls100 = (label_propagation.LabelSpreading().fit(X, y), y)
rbf_svc = (svm.SVC(kernel='rbf').fit(X, y), y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_ = np.arange(x_min, x_max, h)
y_ = np.arange(y_min, y_max, h)
xx, yy = np.meshgrid(x_, y_)

# title for the plots
titles = ['Label Spreading 30% data',
          'Label Spreading 50% data',
          'Label Spreading 100% data',
          'SVC with rbf kernel']

Plot Results

In [4]:
fig = tools.make_subplots(rows=2, cols=2,
                          subplot_titles=tuple(titles),
                          print_grid=False)

def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    
    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
        
    return pl_colorscale
cmap = matplotlib_to_plotly(plt.cm.Paired, 6)
In [5]:
for i, (clf, y_train) in enumerate((ls30, ls50, ls100, rbf_svc)):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    trace1 = go.Heatmap(x=x_, y=y_, z=Z,
                        colorscale=cmap,
                        showscale=False)
    fig.append_trace(trace1, i/2+1, i%2+1)
    # Plot also the training points
    trace2 = go.Scatter(x=X[:, 0], y=X[:, 1], 
                        mode='markers', 
                        showlegend=False,
                        marker=dict(color=X[:, 0],
                                    colorscale=cmap,
                                    line=dict(width=1, color='black'))
                       )
    fig.append_trace(trace2, i/2+1, i%2+1)


for i in map(str,range(1, 5)):
        y = 'yaxis' + i
        x = 'xaxis' + i
        fig['layout'][y].update(showticklabels=False, ticks='')
        fig['layout'][x].update(showticklabels=False, ticks='')
        
fig['layout'].update(height=700)
In [6]:
py.iplot(fig)
Out[6]:

License

Authors:

     Clay Woolam <clay@woolam.org>

License:

     BSD
Still need help?
Contact Us

For guaranteed 24 hour response turnarounds, upgrade to a Developer Support Plan.