Show Sidebar Hide Sidebar

# Linear and Quadratic Discriminant Analysis with confidence ellipsoid in Scikit-learn

Plot the confidence ellipsoids of each class and decision boundary

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18'

### Imports¶

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import math

from scipy import linalg
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

Automatically created module for IPython interactive environment


### Calculaions¶

Colormap

In [3]:
cmap = colors.LinearSegmentedColormap(
'red_blue_classes',
{'red': [(0, 1, 1), (1, 0.7, 0.7)],
'green': [(0, 0.7, 0.7), (1, 0.7, 0.7)],
'blue': [(0, 0.7, 0.7), (1, 1, 1)]})
plt.cm.register_cmap(cmap=cmap)


generate datasets

In [4]:
def dataset_fixed_cov():
'''Generate 2 Gaussians samples with the same covariance matrix'''
n, dim = 300, 2
np.random.seed(0)
C = np.array([[0., -0.23], [0.83, .23]])
X = np.r_[np.dot(np.random.randn(n, dim), C),
np.dot(np.random.randn(n, dim), C) + np.array([1, 1])]
y = np.hstack((np.zeros(n), np.ones(n)))
return X, y

def dataset_cov():
'''Generate 2 Gaussians samples with different covariance matrices'''
n, dim = 300, 2
np.random.seed(0)
C = np.array([[0., -1.], [2.5, .7]]) * 2.
X = np.r_[np.dot(np.random.randn(n, dim), C),
np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4])]
y = np.hstack((np.zeros(n), np.ones(n)))
return X, y


### Plot Functions¶

In [5]:
fig = tools.make_subplots(rows=2, cols=2,
print_grid=False,
subplot_titles= ('Linear Discriminant Analysis',

def matplotlib_to_plotly(cmap, pl_entries):
h = 1.0/(pl_entries-1)
pl_colorscale = []

for k in range(pl_entries):
C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])

return pl_colorscale

def plot_data(lda, X, y, y_pred, fig_index):
tp = (y == y_pred)  # True Positive
tp0, tp1 = tp[y == 0], tp[y == 1]
X0, X1 = X[y == 0], X[y == 1]
X0_tp, X0_fp = X0[tp0], X0[~tp0]
X1_tp, X1_fp = X1[tp1], X1[~tp1]

alpha = 0.5

# class 0: dots
class0 = go.Scatter(x=X0_tp[:, 0], y=X0_tp[:, 1], mode='markers',
opacity=0.75, showlegend=False,
marker=dict(color='red', size=12,
line=dict(color='black', width = 1)))
class0_dark = go.Scatter(x=X0_fp[:, 0], y=X0_fp[:, 1], mode='markers',
opacity=0.75, showlegend=False,
marker=dict(color='#990000', size=12,
line=dict(color='black', width = 1)))  # dark red

# class 1: dots
class1 = go.Scatter(x=X1_tp[:, 0], y=X1_tp[:, 1], mode='markers',
opacity=0.75, showlegend=False,
marker=dict(color='blue', size=12,
line=dict(color='black', width = 1)))
class1_dark = go.Scatter(x=X1_fp[:, 0], y=X1_fp[:, 1], mode='markers',
opacity=0.75, showlegend=False,
marker=dict(color='#000099',size=12,
line=dict(color='black', width = 1)))  # dark blue

# class 0 and 1 : areas
nx, ny = 200, 200

x1_max, x1_min, = max(X0_tp[:, 0]),  min(X0_tp[:, 0])
x2_max, x2_min = max(X1_tp[:, 0]),  min(X1_tp[:, 0])
x_max, x_min = max(x1_max, x2_max), min(x1_min, x2_min)

y1_max, y1_min = max(X0_tp[:, 1]),  min(X0_tp[:, 1])
y2_max, y2_min = max(X1_tp[:, 1]),  min(X1_tp[:, 1])
y_max, y_min = max(y1_max, y2_max), min(y1_min, y2_min)

xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
np.linspace(y_min, y_max, ny))

Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:, 1].reshape(xx.shape)

trace = go.Heatmap(x=np.linspace(x_min, x_max, nx),
z=Z,
y=np.linspace(y_min, y_max, ny),
colorscale= matplotlib_to_plotly(cmap,300),
showscale=False)
#means
means1 = go.Scatter(x=lda.means_[0][0], y=lda.means_[0][1],
opacity=0.75, mode='markers',
showlegend=False,
marker=dict(color='black',
size=14))
means2 = go.Scatter(x=lda.means_[1][0], y=lda.means_[1][1],
opacity=0.75, mode='markers',
showlegend=False,
marker=dict(color='black',
size=14))

plot = [ means1, means2, class1, class1_dark, class0, class0_dark, trace]

return plot

def plot_ellipse(splot, mean, cov, color):
v, w = linalg.eigh(cov)
u = w[0] / linalg.norm(w[0])
a =  v[1] ** 0.5
b =  v[0] ** 0.5
x_origin = mean[0]
y_origin = mean[1]
x_ = [ ]
y_ = [ ]

for t in range(0,361,10):
x_.append(x)
y_.append(y)

elle = go.Scatter(x=x_ , y=y_, mode='lines',
showlegend=False,
line=dict(color='rgb(255,215,0)',
width=3))

return elle

def plot_lda_cov(lda, splot):
ellipse1 = plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
ellipse2 = plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')
return [ellipse1, ellipse2]

def plot_qda_cov(qda, splot):
ellipse1 = plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red')
ellipse2 = plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue')
return [ellipse1, ellipse2]


### Plots¶

In [6]:
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
# Linear Discriminant Analysis
total1 = []
total2 = []
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
y_pred = lda.fit(X, y).predict(X)
splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
ellipse_plot = plot_lda_cov(lda, splot)
total1 = splot  + ellipse_plot
for k in range(len(total1)):
if(i==0):
fig.append_trace(total1[k], 1, 1)

elif(i==1):
fig.append_trace(total1[k], 2, 1)

y_pred = qda.fit(X, y).predict(X)
splot1 = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
ellipse_plot1 = plot_qda_cov(qda, splot)
total2 = splot1 + ellipse_plot1

for k in range(len(total2)):
if (i==0):
fig.append_trace(total2[k], 1, 2)

elif (i==1):
fig.append_trace(total2[k], 2, 2)

fig['layout'].update(height=900)

for i in map(str, range(1,5)):
x = 'xaxis' + i
y = 'yaxis' + i
fig['layout'][y].update(showticklabels=False, ticks='',
zeroline=False, showgrid=False)
fig['layout'][x].update(showticklabels=False, ticks='',
zeroline=False, showgrid=False)

y_title = ['Data with Fixed Covariance', 'Data with variable Covariance']
j = 0
for i in map(str, range(1,5, 2)):
y = 'yaxis' + i
fig['layout'][y].update(title = y_title[j])
j+=1

py.iplot(fig)

Out[6]:
Still need help?