Show Sidebar Hide Sidebar

# SVM Margins in Scikit-learn

The plots below illustrate the effect the parameter C has on the separation line. A large value of C basically tells our model that we do not have that much faith in our data’s distribution, and will only consider points close to line of separation.

A small value of C includes more/all the observations, allowing the margins to be calculated using all the data in the area.

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18.1'

### Imports¶

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

Automatically created module for IPython interactive environment


### Calculations¶

In [3]:
# we create 40 separable points
np.random.seed(0)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20

# figure number
fignum = 1

def matplotlib_to_plotly(cmap, pl_entries):
h = 1.0/(pl_entries-1)
pl_colorscale = []

for k in range(pl_entries):
C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

return pl_colorscale

cmap = matplotlib_to_plotly(plt.cm.Paired, 4)


### Plot Results¶

In [4]:
fig = tools.make_subplots(rows=1, cols=2,
subplot_titles=("unreg",
"reg"))

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]


In [5]:
for name, penalty in (('unreg', 1), ('reg', 0.05)):

clf = svm.SVC(kernel='linear', C=penalty)
clf.fit(X, Y)

# get the separating hyperplane
w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(-5, 5)
yy = a * xx - (clf.intercept_[0]) / w[1]

# plot the parallels to the separating hyperplane that pass through the
# support vectors
margin = 1 / np.sqrt(np.sum(clf.coef_ ** 2))
yy_down = yy + a * margin
yy_up = yy - a * margin

x_min = -4.8
x_max = 4.2
y_min = -6
y_max = 6
x_ = np.linspace(x_min, x_max, 200)
y_ = np.linspace(y_min, y_max, 200)

XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
Z = clf.predict(np.c_[XX.ravel(), YY.ravel()])

# Put the result into a color plot
Z = Z.reshape(XX.shape)

p6 = go.Heatmap(x=x_, y=y_, z=Z,
colorscale=cmap,
showscale=False)

fig.append_trace(p6, 1, fignum)
# plot the line, the points, and the nearest vectors to the plane

p1 = go.Scatter(x=xx, y=yy,
mode='lines',
line=dict(color='black', dash='dash'))
fig.append_trace(p1, 1, fignum)

p2 = go.Scatter(x=xx, y=yy_down,
mode='lines',
line=dict(color='black', dash='dash'))
fig.append_trace(p2, 1, fignum)

p3 = go.Scatter(x=xx, y=yy_up,
mode='lines',
line=dict(color='black', dash='dash'))
fig.append_trace(p3, 1, fignum)

p4 = go.Scatter(x=clf.support_vectors_[:, 0], y=clf.support_vectors_[:, 1],
mode='markers',
marker=dict(color='white', size=14,
line=dict(color='black', width=1)))
fig.append_trace(p4, 1, fignum)

p5 = go.Scatter(x=X[:, 0], y=X[:, 1],
mode='markers',
marker=dict(color=Y,
colorscale=cmap,
line=dict(color='black', width=1),
showscale=False))

fig.append_trace(p5, 1, fignum)

fignum+=1

In [6]:
for i in map(str,range(1, 3)):
y = 'yaxis' + i
x = 'xaxis' + i
fig['layout'][y].update(showticklabels=False, ticks='',
range=[y_min, y_max])
fig['layout'][x].update(showticklabels=False, ticks='',
range=[x_min, x_max])

fig['layout'].update(showlegend=False)
py.iplot(fig)

Out[6]:

Code source:

          Gaël Varoquaux



           BSD 3 clause