Show Sidebar Hide Sidebar

Decision Boundaries of a VotingClassifier in Scikit-learn

Plot the decision boundaries of a VotingClassifier for two features of the Iris dataset.

Plot the class probabilities of the first sample in a toy dataset predicted by three different classifiers and averaged by the VotingClassifier.

First, three exemplary classifiers are initialized (DecisionTreeClassifier, KNeighborsClassifier, and SVC) and used to initialize a soft-voting VotingClassifier with weights [2, 1, 2], which means that the predicted probabilities of the DecisionTreeClassifier and SVC count 5 times as much as the weights of the KNeighborsClassifier classifier when the averaged probability is calculated.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!


In [1]:
import sklearn


In [2]:

import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools

from itertools import product
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
Automatically created module for IPython interactive environment


In [3]:
# Loading some example data
iris = datasets.load_iris()
X =[:, [0, 2]]
y =

# Training classifiers
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf', probability=True)
eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2),
                                    ('svc', clf3)],
                        voting='soft', weights=[2, 1, 2]), y), y), y), y)
VotingClassifier(estimators=[('dt', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_s...',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))],
         n_jobs=1, voting='soft', weights=[2, 1, 2])

Plot Results

In [4]:
titles =  ('Decision Tree (depth=4)', 'KNN (k=7)',
           'Kernel SVM', 'Soft Voting')

fig = tools.make_subplots(rows=2, cols=2,

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))
y_ = np.arange(y_min, y_max, 0.1)
In [5]:
for idx, clf in zip(product([0, 1], [0, 1]),
                    [clf1, clf2, clf3, eclf]):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    trace1 = go.Contour(x=xx[0], y=y_, 
                        colorscale=[[0, 'purple'],
                                    [0.5, 'cyan'],
                                    [1, 'pink']
    trace2 = go.Scatter(x=X[:, 0], y=X[:, 1], 
                                    line=dict(color='black', width=1)
    fig.append_trace(trace1, idx[0]+1, idx[1]+1)
    fig.append_trace(trace2, idx[0]+1, idx[1]+1)
fig['layout'].update(height=900, hovermode='closest')

for i in map(str, range(1,5)):
    x = 'xaxis' + i
    y = 'yaxis' + i
    fig['layout'][x].update(showgrid=False, zeroline=False)
    fig['layout'][y].update(showgrid=False, zeroline=False)
In [6]: