Show Sidebar Hide Sidebar

Face Completion with Multi-Output Estimators in Scikit-learn

This example shows the use of multi-output estimator to complete images. The goal is to predict the lower half of a face given its upper half.

The first column of images shows true faces. The next columns illustrate how extremely randomized trees, k nearest neighbors, linear regression and ridge regression complete the lower half of those faces.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!


In [1]:
import sklearn


In [2]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
Automatically created module for IPython interactive environment


In [3]:
# Load the faces datasets
data = fetch_olivetti_faces()
targets =

data = data.images.reshape((len(data.images), -1))
train = data[targets < 30]
test = data[targets >= 30]  # Test on independent people

# Test on a subset of people
n_faces = 5
rng = check_random_state(4)
face_ids = rng.randint(test.shape[0], size=(n_faces, ))
test = test[face_ids, :]

n_pixels = data.shape[1]
X_train = train[:, :np.ceil(0.5 * n_pixels)]  # Upper half of the faces
y_train = train[:, np.floor(0.5 * n_pixels):]  # Lower half of the faces
X_test = test[:, :np.ceil(0.5 * n_pixels)]
y_test = test[:, np.floor(0.5 * n_pixels):]

# Fit estimators
    "Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32,
    "K-nn": KNeighborsRegressor(),
    "Linear regression": LinearRegression(),
    "Ridge": RidgeCV(),

y_test_predict = dict()
for name, estimator in ESTIMATORS.items():, y_train)
    y_test_predict[name] = estimator.predict(X_test)


In [4]:
def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
    return pl_colorscale

fig = tools.make_subplots(rows=5, cols=5, print_grid=False,
                          subplot_titles=('True Faces','Extra Trees', 'K-nn',
                                         'Linear Regression','Ridge'))
image_shape = (64, 64)

for i in range(n_faces):
    true_face = np.hstack((X_test[i], y_test[i]))
    truefaces= go.Heatmap(z=true_face.reshape(image_shape),
                          colorscale = 
                          matplotlib_to_plotly(, len(true_face.reshape(image_shape))))
    fig.append_trace(truefaces, i+1, 1)

    for j, est in enumerate(sorted(ESTIMATORS)):
        completed_face = np.hstack((X_test[i], y_test_predict[est][i]))
        completed_face = go.Heatmap(z=completed_face.reshape(image_shape),
                               colorscale = 
                               matplotlib_to_plotly(, len(true_face.reshape(image_shape))))
        fig.append_trace(completed_face, i+1, j+2)

fig['layout'].update(height=1000, title='Face Completion With a Multi-Output Estimators')

for i in map(str,range(1,26)):
    y = 'yaxis'+i
    x = 'xaxis'+i
                               showticklabels=False, ticks='')
    fig['layout'][x].update(showticklabels=False, ticks='')
In [5]: