Show Sidebar Hide Sidebar

Linear and Quadratic Discriminant Analysis with confidence ellipsoid in Scikit-learn

Plot the confidence ellipsoids of each class and decision boundary

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version

In [1]:
import sklearn
sklearn.__version__
Out[1]:
'0.18'

Imports

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import math

from scipy import linalg
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
Automatically created module for IPython interactive environment

Calculaions

Colormap

In [3]:
cmap = colors.LinearSegmentedColormap(
    'red_blue_classes',
    {'red': [(0, 1, 1), (1, 0.7, 0.7)],
     'green': [(0, 0.7, 0.7), (1, 0.7, 0.7)],
     'blue': [(0, 0.7, 0.7), (1, 1, 1)]})
plt.cm.register_cmap(cmap=cmap)

generate datasets

In [4]:
def dataset_fixed_cov():
    '''Generate 2 Gaussians samples with the same covariance matrix'''
    n, dim = 300, 2
    np.random.seed(0)
    C = np.array([[0., -0.23], [0.83, .23]])
    X = np.r_[np.dot(np.random.randn(n, dim), C),
              np.dot(np.random.randn(n, dim), C) + np.array([1, 1])]
    y = np.hstack((np.zeros(n), np.ones(n)))
    return X, y


def dataset_cov():
    '''Generate 2 Gaussians samples with different covariance matrices'''
    n, dim = 300, 2
    np.random.seed(0)
    C = np.array([[0., -1.], [2.5, .7]]) * 2.
    X = np.r_[np.dot(np.random.randn(n, dim), C),
              np.dot(np.random.randn(n, dim), C.T) + np.array([1, 4])]
    y = np.hstack((np.zeros(n), np.ones(n)))
    return X, y

Plot Functions

In [5]:
fig = tools.make_subplots(rows=2, cols=2,
                          print_grid=False,
                          subplot_titles= ('Linear Discriminant Analysis',
                                           'Quadratic Discriminant Analysis'))

def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    
    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
        
    return pl_colorscale

def plot_data(lda, X, y, y_pred, fig_index):
    tp = (y == y_pred)  # True Positive
    tp0, tp1 = tp[y == 0], tp[y == 1]
    X0, X1 = X[y == 0], X[y == 1]
    X0_tp, X0_fp = X0[tp0], X0[~tp0]
    X1_tp, X1_fp = X1[tp1], X1[~tp1]

    alpha = 0.5

    # class 0: dots
    class0 = go.Scatter(x=X0_tp[:, 0], y=X0_tp[:, 1], mode='markers', 
                        opacity=0.75, showlegend=False,
                        marker=dict(color='red', size=12,
                                    line=dict(color='black', width = 1)))
    class0_dark = go.Scatter(x=X0_fp[:, 0], y=X0_fp[:, 1], mode='markers', 
                             opacity=0.75, showlegend=False,
                             marker=dict(color='#990000', size=12,
                                         line=dict(color='black', width = 1)))  # dark red

    # class 1: dots
    class1 = go.Scatter(x=X1_tp[:, 0], y=X1_tp[:, 1], mode='markers', 
                        opacity=0.75, showlegend=False,
                        marker=dict(color='blue', size=12,
                                   line=dict(color='black', width = 1)))
    class1_dark = go.Scatter(x=X1_fp[:, 0], y=X1_fp[:, 1], mode='markers', 
                             opacity=0.75, showlegend=False,
                             marker=dict(color='#000099',size=12,
                                        line=dict(color='black', width = 1)))  # dark blue

    # class 0 and 1 : areas
    nx, ny = 200, 200
 
    x1_max, x1_min, = max(X0_tp[:, 0]),  min(X0_tp[:, 0])
    x2_max, x2_min = max(X1_tp[:, 0]),  min(X1_tp[:, 0])
    x_max, x_min = max(x1_max, x2_max), min(x1_min, x2_min)
 
    y1_max, y1_min = max(X0_tp[:, 1]),  min(X0_tp[:, 1])
    y2_max, y2_min = max(X1_tp[:, 1]),  min(X1_tp[:, 1])
    y_max, y_min = max(y1_max, y2_max), min(y1_min, y2_min)
    
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                         np.linspace(y_min, y_max, ny))
    
    Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:, 1].reshape(xx.shape)
    
    trace = go.Heatmap(x=np.linspace(x_min, x_max, nx),
                       z=Z,
                       y=np.linspace(y_min, y_max, ny),
                       colorscale= matplotlib_to_plotly(cmap,300),
                       showscale=False)
    #means
    means1 = go.Scatter(x=lda.means_[0][0], y=lda.means_[0][1],
                        opacity=0.75, mode='markers',
                        showlegend=False,
                        marker=dict(color='black',
                                          size=14))
    means2 = go.Scatter(x=lda.means_[1][0], y=lda.means_[1][1],
                        opacity=0.75, mode='markers',
                        showlegend=False,
                        marker=dict(color='black',
                                          size=14))
    
    plot = [ means1, means2, class1, class1_dark, class0, class0_dark, trace]
    
    return plot

def plot_ellipse(splot, mean, cov, color):
    v, w = linalg.eigh(cov)
    u = w[0] / linalg.norm(w[0])
    a =  v[1] ** 0.5
    b =  v[0] ** 0.5
    x_origin = mean[0]
    y_origin = mean[1]
    x_ = [ ]
    y_ = [ ]
    
    for t in range(0,361,10):
        x = a*(math.cos(math.radians(t))) + x_origin
        x_.append(x)
        y = b*(math.sin(math.radians(t))) + y_origin
        y_.append(y)
    
    elle = go.Scatter(x=x_ , y=y_, mode='lines',
                      showlegend=False,
                      line=dict(color='rgb(255,215,0)',
                                width=3))
    
    return elle

def plot_lda_cov(lda, splot):
    ellipse1 = plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
    ellipse2 = plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')
    return [ellipse1, ellipse2]

def plot_qda_cov(qda, splot):
    ellipse1 = plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red')
    ellipse2 = plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue')
    return [ellipse1, ellipse2]

Plots

In [6]:
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
    # Linear Discriminant Analysis
    total1 = []
    total2 = []
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    y_pred = lda.fit(X, y).predict(X)
    splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
    ellipse_plot = plot_lda_cov(lda, splot)
    total1 = splot  + ellipse_plot
    for k in range(len(total1)):
        if(i==0):
            fig.append_trace(total1[k], 1, 1)
        
        elif(i==1):
            fig.append_trace(total1[k], 2, 1)
            
    # Quadratic Discriminant Analysis
    qda = QuadraticDiscriminantAnalysis(store_covariances=True)
    y_pred = qda.fit(X, y).predict(X)
    splot1 = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
    ellipse_plot1 = plot_qda_cov(qda, splot)
    total2 = splot1 + ellipse_plot1
    
    for k in range(len(total2)):
        if (i==0):
            fig.append_trace(total2[k], 1, 2)
            
        elif (i==1):
             fig.append_trace(total2[k], 2, 2)

fig['layout'].update(height=900)

for i in map(str, range(1,5)):
    x = 'xaxis' + i
    y = 'yaxis' + i
    fig['layout'][y].update(showticklabels=False, ticks='',
                            zeroline=False, showgrid=False)
    fig['layout'][x].update(showticklabels=False, ticks='',
                            zeroline=False, showgrid=False)
    
y_title = ['Data with Fixed Covariance', 'Data with variable Covariance']
j = 0
for i in map(str, range(1,5, 2)):
    y = 'yaxis' + i
    fig['layout'][y].update(title = y_title[j])
    j+=1
    
py.iplot(fig)
Out[6]:
Still need help?
Contact Us

For guaranteed 24 hour response turnarounds, upgrade to a Developer Support Plan.