Show Sidebar Hide Sidebar

# Multi-Dimensional Scaling in Scikit-learn

An illustration of the metric and non-metric MDS on generated noisy data.

The reconstructed points using the metric MDS and non metric MDS are slightly shifted to avoid overlapping.

#### New to Plotly?¶

You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

### Version¶

In [1]:
import sklearn
sklearn.__version__

Out[1]:
'0.18.1'

### Imports¶

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go

import numpy as np

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

Automatically created module for IPython interactive environment


### Calculations¶

In [3]:
n_samples = 20
seed = np.random.RandomState(seed=3)
X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float)
X_true = X_true.reshape((n_samples, 2))
# Center the data
X_true -= X_true.mean()

similarities = euclidean_distances(X_true)

# Add noise to the similarities
noise = np.random.rand(n_samples, n_samples)
noise = noise + noise.T
noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0
similarities += noise

mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
dissimilarity="precomputed", n_jobs=1)
pos = mds.fit(similarities).embedding_

nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
dissimilarity="precomputed", random_state=seed, n_jobs=1,
n_init=1)
npos = nmds.fit_transform(similarities, init=pos)

# Rescale the data
pos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((pos ** 2).sum())
npos *= np.sqrt((X_true ** 2).sum()) / np.sqrt((npos ** 2).sum())

# Rotate the data
clf = PCA(n_components=2)
X_true = clf.fit_transform(X_true)

pos = clf.fit_transform(pos)

npos = clf.fit_transform(npos)


### Plot Results¶

In [4]:
data = []
p1 = go.Scatter(x=X_true[:, 0], y=X_true[:, 1],
mode='markers+lines',
marker=dict(color='navy', size=10),
line=dict(width=1),
name='True Position')
data.append(p1)
p2 = go.Scatter(x=pos[:, 0], y=pos[:, 1],
mode='markers+lines',
marker=dict(color='turquoise', size=10),
line=dict(width=1),
name='MDS')
data.append(p2)
p3 = go.Scatter(x=npos[:, 0], y=npos[:, 1],
mode='markers+lines',
marker=dict(color='orange', size=10),
line=dict(width=1),
name='NMDS')
data.append(p3)

similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0

# Plot the edges
start_idx, end_idx = np.where(pos)
# a sequence of (*line0*, *line1*, *line2*), where::
#            linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[X_true[i, :], X_true[j, :]]
for i in range(len(pos)) for j in range(len(pos))]
values = np.abs(similarities)
for i in range(len(segments)):
p4 = go.Scatter(x=[segments[i][0][0],segments[i][1][0]],
y=[segments[i][0][1],segments[i][1][1]],
mode = 'lines',
showlegend=False,
line = dict(
color = 'lightblue',
width = 0.5))
data.append(p4)

layout = go.Layout(xaxis=dict(zeroline=False, showgrid=False,
ticks='', showticklabels=False),
yaxis=dict(zeroline=False, showgrid=False,
ticks='', showticklabels=False),
height=900, hovermode='closest')
fig = go.Figure(data=data, layout=layout)

In [5]:
py.iplot(fig)

Out[5]:

Author:

    Nelle Varoquaux <nelle.varoquaux@gmail.com>



    BSD