Kernel PCA in Scikit-learn

This example shows that Kernel PCA is able to find a projection of the data that makes data linearly separable.

Version¶

import sklearn
sklearn.__version__

'0.18'

Imports¶

This tutorial imports PCA, KernelPCA and make_circles.

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles


Calculations¶

np.random.seed(0)

X, y = make_circles(n_samples=400, factor=.3, noise=.05)

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)
pca = PCA()
X_pca = pca.fit_transform(X)


Plot Results¶

fig = tools.make_subplots(rows=2, cols=2,
subplot_titles=("Original space",
"Projection by PCA",
"Projection by KPCA",
"Original space after inverse transform"))

reds = y == 0
blues = y == 1

original_space1 = go.Scatter(x=X[reds, 0],
y=X[reds, 1],
mode='markers',
showlegend=False,
marker=dict(color='red',
line=dict(color='black', width=1))
)
original_space2 = go.Scatter(x=X[blues, 0],
y=X[blues, 1],
mode='markers',
showlegend=False,
marker=dict(color='blue',
line=dict(color='black', width=1))

)

X1, X2 = np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50)
X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T

# projection on the first principal component (in the phi space)
Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)
lines = go.Contour(x=X1,
y=X2,
z=Z_grid,
showscale=False,
colorscale=[[0,'white'],[1, 'black']],
contours=dict(coloring='lines')
)

fig.append_trace(lines, 1, 1)
fig.append_trace(original_space1, 1, 1)
fig.append_trace(original_space2, 1, 1)

fig['layout']['xaxis1'].update(title='x<sub>1</sub>',
zeroline=False, showgrid=False)
fig['layout']['yaxis1'].update(title='x<sub>2</sub>',
zeroline=False, showgrid=False)

projection_pca1 = go.Scatter(x=X_pca[reds, 0],
y=X_pca[reds, 1],
mode='markers',
showlegend=False,
marker=dict(color='red',
line=dict(color='black', width=1))
)
projection_pca2 = go.Scatter(x=X_pca[blues, 0],
y=X_pca[blues, 1],
mode='markers',
showlegend=False,
marker=dict(color='blue',
line=dict(color='black', width=1))
)

fig.append_trace(projection_pca1, 1, 2)
fig.append_trace(projection_pca2, 1, 2)

fig['layout']['xaxis2'].update(title='1st principal component',
zeroline=False, showgrid=False)
fig['layout']['yaxis2'].update(title='2nd component',
zeroline=False, showgrid=False)

projection_kpca1 = go.Scatter(x=X_kpca[reds, 0],
y=X_kpca[reds, 1],
mode='markers',
showlegend=False,
marker=dict(color='red',
line=dict(color='black', width=1))
)
projection_kpca2 = go.Scatter(x=X_kpca[blues, 0],
y=X_kpca[blues, 1],
mode='markers',
showlegend=False,
marker=dict(color='blue',
line=dict(color='black', width=1))
)

fig.append_trace(projection_kpca1, 2, 1)
fig.append_trace(projection_kpca2, 2, 1)

fig['layout']['xaxis3'].update(title="1st principal component in space induced by Phi",
zeroline=False, showgrid=False)
fig['layout']['yaxis3'].update(title='2nd component',
zeroline=False, showgrid=False)

after_inverse1 = go.Scatter(x=X_back[reds, 0],
y=X_back[reds, 1],
mode='markers',
showlegend=False,
marker=dict(color='red',
line=dict(color='black', width=1))
)
after_inverse2 = go.Scatter(x=X_back[blues, 0],
y=X_back[blues, 1],
mode='markers',
showlegend=False,
marker=dict(color='blue',
line=dict(color='black', width=1))
)

fig.append_trace(after_inverse1, 2, 2)
fig.append_trace(after_inverse2, 2, 2)

fig['layout']['xaxis4'].update(title='x<sub>1</sub>',
zeroline=False, showgrid=False)
fig['layout']['yaxis4'].update(title='x<sub>2</sub>',
zeroline=False, showgrid=False)

fig['layout'].update(height=800)

py.iplot(fig)

Authors:

      Mathieu Blondel

Andreas Mueller



      BSD 3 clause