Show Sidebar Hide Sidebar

Decision Surface of a Decision Tree on the Iris Dataset in Scikit-learn

Plot the decision surface of a decision tree trained on pairs of features of the iris dataset.

See decision tree for more information on the estimator.

For each pair of iris features, the decision tree learns decision boundaries made of combinations of simple thresholding rules inferred from the training samples.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version

In [1]:
import sklearn
sklearn.__version__
Out[1]:
'0.18.1'

Imports

In [2]:
print(__doc__)

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
Automatically created module for IPython interactive environment

Calculations

In [3]:
# Parameters
n_classes = 3
plot_colors = "bry"
plot_step = 0.02

# Load data
iris = load_iris()

Plot Results

In [4]:
def matplotlib_to_plotly(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    
    for k in range(pl_entries):
        C = map(np.uint8, np.array(cmap(k*h)[:3])*255)
        pl_colorscale.append([k*h, 'rgb'+str((C[0], C[1], C[2]))])
        
    return pl_colorscale

cmap = matplotlib_to_plotly(plt.cm.Paired, 5)

fig = tools.make_subplots(rows=2, cols=3)

xlabel = []
ylabel = []
This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]
[ (2,1) x4,y4 ]  [ (2,2) x5,y5 ]  [ (2,3) x6,y6 ]

In [5]:
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
                                [1, 2], [1, 3], [2, 3]]):
    # We only take the two corresponding features
    X = iris.data[:, pair]
    y = iris.target

    # Train
    clf = DecisionTreeClassifier().fit(X, y)

    # Plot the decision boundary

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    x_ = np.arange(x_min, x_max, plot_step)
    y_ = np.arange(y_min, y_max, plot_step)
    xx, yy = np.meshgrid(x_, y_)

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    xlabel.append(iris.feature_names[pair[0]])
    ylabel.append(iris.feature_names[pair[1]])
    cs = go.Heatmap(x=x_, y=y_, z=Z, 
                    colorscale=cmap,
                    showscale=False)

    fig.append_trace(cs, pairidx/3+1, pairidx%3+1)
    
    # Plot the training points
    for i, color in zip(range(n_classes), plot_colors):
        idx = np.where(y == i)
        p1 = go.Scatter(x=X[idx, 0][0], y=X[idx, 1][0], 
                        mode='markers',
                        marker=dict(color=color,
                                    colorscale=cmap,
                                    showscale=False,
                                    line=dict(color='black', width=1)),
                        showlegend=False)
        fig.append_trace(p1, pairidx/3+1, pairidx%3+1)
j = 0
for i in map(str,range(1, 7)):
        y = 'yaxis' + i
        x = 'xaxis' + i
        fig['layout'][y].update(showticklabels=False, ticks='',
                                title=ylabel[j])
        
        fig['layout'][x].update(showticklabels=False, ticks='',
                                title=xlabel[j])
        j+=1
        
fig['layout'].update(height=700, hovermode='closest',
                     title="Decision surface of a decision tree using paired features")
In [6]:
py.iplot(fig)
Out[6]:
Still need help?
Contact Us

For guaranteed 24 hour response turnarounds, upgrade to a Developer Support Plan.