Show Sidebar Hide Sidebar

# SGD Weighted Samples in Scikit-learn

Plot decision function of a weighted dataset, where the size of points is proportional to its weight.

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18.1'

### Imports¶

In [2]:
import plotly.plotly as py
import plotly.graph_objs as go

import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model


### Calculations¶

In [3]:
# we create 20 points
np.random.seed(0)
X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)]
y = [1] * 10 + [-1] * 10
sample_weight = 20 * np.abs(np.random.randn(20))
# and assign a bigger weight to the last 10 samples
sample_weight[:10] *= 2
data = []


### Plot Results¶

In [4]:
def matplotlib_to_plotly(cmap, pl_entries):
h = 1.0/(pl_entries-1)
pl_colorscale = []

for k in range(pl_entries):
C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

return pl_colorscale
cmap = []
k = []

for i in range(0, 11):
k.append([i, 'white'])

cmap[ :10] = k
cmap[10: 21] = matplotlib_to_plotly(plt.cm.bone, 10)


Plot the weighted data points

In [5]:
x_ = np.linspace(-4, 5, 500)
y_ = np.linspace(-4, 5, 500)
xx, yy = np.meshgrid(x_, y_)

for i in range(0, len(sample_weight)):
trace = go.Scatter(x=[X[:, 0][i]], y=[X[:, 1][i]],
mode='markers',
showlegend=False,
marker=dict(color=cmap[i][1], size=sample_weight[i],
line=dict(color='black', width=1)),
opacity=0.5,
)
data.append(trace)


Fit the unweighted model

In [6]:
clf = linear_model.SGDClassifier(alpha=0.01, n_iter=100)
clf.fit(X, y)
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
no_weights =  go.Contour(x=x_, y=y_, z=Z,
ncontours=2,
contours=dict(coloring='lines'),
line=dict(width=2),
showscale=False,
colorscale=[[0,'blue'],[1,'white']]
)
data.append(no_weights)


Fit the weighted model

In [7]:
clf = linear_model.SGDClassifier(alpha=0.01, n_iter=100)
clf.fit(X, y, sample_weight=sample_weight)
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
samples_weights = go.Contour(x=x_, y=y_, z=Z,
ncontours=2,
showscale=False,
contours=dict(coloring='lines'),
line=dict(dash='dash', width=2),
colorscale=[[0,'blue'],[1,'white']]
)
data.append(samples_weights)

In [8]:
layout = go.Layout(xaxis=dict(zeroline=False, showgrid=False),
yaxis=dict(zeroline=False, showgrid=False)
)

fig = go.Figure(data=data, layout=layout)

In [9]:
py.iplot(fig)

Out[9]:
Still need help?