Show Sidebar Hide Sidebar

Distplots in Python

How to make interactive Distplots in Python with Plotly.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version Check

Note: Distplots are available in version 1.11.0+
Run pip install plotly --upgrade to update your Plotly version

In [1]:
import plotly
plotly.__version__
Out[1]:
'2.0.2'

Basic Distplot

In [2]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x = np.random.randn(1000)  
hist_data = [x]
group_labels = ['distplot']

fig = ff.create_distplot(hist_data, group_labels)
py.iplot(fig, filename='Basic Distplot')
Out[2]:

Plot Multiple Datasets

In [3]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

# Add histogram data
x1 = np.random.randn(200)-2  
x2 = np.random.randn(200)  
x3 = np.random.randn(200)+2  
x4 = np.random.randn(200)+4  

# Group data together
hist_data = [x1, x2, x3, x4]

group_labels = ['Group 1', 'Group 2', 'Group 3', 'Group 4']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=.2)

# Plot!
py.iplot(fig, filename='Distplot with Multiple Datasets')
Out[3]:

Use Multiple Bin Sizes

In [4]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

# Add histogram data
x1 = np.random.randn(200)-2  
x2 = np.random.randn(200)  
x3 = np.random.randn(200)+2  
x4 = np.random.randn(200)+4  

# Group data together
hist_data = [x1, x2, x3, x4]

group_labels = ['Group 1', 'Group 2', 'Group 3', 'Group 4']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5, 1])

# Plot!
py.iplot(fig, filename='Distplot with Multiple Bin Sizes')
Out[4]:

Customize Rug Text, Colors & Title

In [5]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(26)  
x2 = np.random.randn(26) + .5  

hist_data = [x1, x2]

group_labels = ['2014', '2015']

rug_text_one = ['a', 'b', 'c', 'd', 'e',
                'f', 'g', 'h', 'i', 'j', 
                'k', 'l', 'm', 'n', 'o',
                'p', 'q', 'r', 's', 't', 
                'u', 'v', 'w', 'x', 'y', 'z'] 

rug_text_two = ['aa', 'bb', 'cc', 'dd', 'ee',
                'ff', 'gg', 'hh', 'ii', 'jj', 
                'kk', 'll', 'mm', 'nn', 'oo',
                'pp', 'qq', 'rr', 'ss', 'tt', 
                'uu', 'vv', 'ww', 'xx', 'yy', 'zz'] 

rug_text = [rug_text_one, rug_text_two]

colors = ['rgb(0, 0, 100)', 'rgb(0, 200, 200)']

# Create distplot with custom bin_size
fig = ff.create_distplot(
    hist_data, group_labels, bin_size=.2,
    rug_text=rug_text, colors=colors)

fig['layout'].update(title='Customized Distplot')

# Plot!
py.iplot(fig, filename='Distplot Colors')
Out[5]:

Plot Normal Curve

In [6]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(200)  
x2 = np.random.randn(200) + 2    
hist_data = [x1, x2]

group_labels = ['Group 1', 'Group 2']

colors = ['#3A4750', '#F64E8B']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, bin_size=.5, curve_type='normal', colors=colors)

# Add title
fig['layout'].update(title='Distplot with Normal Distribution')

# Plot!
py.iplot(fig, filename='Distplot with Normal Curve')
Out[6]:

Plot Only Curve and Rug

In [7]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(200) - 1 
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1 

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#333F44', '#37AA9C', '#94F3E4']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=colors)

# Add title
fig['layout'].update(title='Curve and Rug Plot')

# Plot!
py.iplot(fig, filename='Curve and Rug')
Out[7]:

Plot Only Hist and Rug

In [8]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(200) - 1 
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1 

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#835AF1', '#7FA6EE', '#B8F7D4']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors, bin_size=.25, show_curve=False)

# Add title
fig['layout'].update(title='Hist and Rug Plot')

# Plot!
py.iplot(fig, filename='Hist and Rug')
Out[8]:

Plot Hist and Rug with Different Bin Sizes

In [9]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(200) - 2 
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2 

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#393E46', '#2BCDC1', '#F66095']
# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors, 
                         bin_size=[0.3, 0.2, 0.1], show_curve=False)

# Add title
fig['layout'].update(title='Hist and Rug Plot')

# Plot!
py.iplot(fig, filename='Hist and Rug Different Bin Size')
Out[9]:

Plot Only Hist and Curve

In [10]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np

x1 = np.random.randn(200) - 2 
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2 

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=.2, show_rug=False)

# Add title
fig['layout'].update(title='Hist and Curve Plot')

# Plot!
py.iplot(fig, filename='Hist and Curve')
Out[10]:

Distplot with Pandas

In [11]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

df = pd.DataFrame({'2012': np.random.randn(200),
                   '2013': np.random.randn(200)+1})
py.iplot(ff.create_distplot([df[c] for c in df.columns], df.columns, bin_size=.25),
                            filename='distplot with pandas')
Out[11]:

Reference

In [12]:
help(ff.create_distplot)
Help on function create_distplot in module plotly.figure_factory._distplot:

create_distplot(hist_data, group_labels, bin_size=1.0, curve_type='kde', colors=None, rug_text=None, histnorm='probability density', show_hist=True, show_curve=True, show_rug=True)
    BETA function that creates a distplot similar to seaborn.distplot
    
    The distplot can be composed of all or any combination of the following
    3 components: (1) histogram, (2) curve: (a) kernel density estimation
    or (b) normal curve, and (3) rug plot. Additionally, multiple distplots
    (from multiple datasets) can be created in the same plot.
    
    :param (list[list]) hist_data: Use list of lists to plot multiple data
        sets on the same plot.
    :param (list[str]) group_labels: Names for each data set.
    :param (list[float]|float) bin_size: Size of histogram bins.
        Default = 1.
    :param (str) curve_type: 'kde' or 'normal'. Default = 'kde'
    :param (str) histnorm: 'probability density' or 'probability'
        Default = 'probability density'
    :param (bool) show_hist: Add histogram to distplot? Default = True
    :param (bool) show_curve: Add curve to distplot? Default = True
    :param (bool) show_rug: Add rug to distplot? Default = True
    :param (list[str]) colors: Colors for traces.
    :param (list[list]) rug_text: Hovertext values for rug_plot,
    :return (dict): Representation of a distplot figure.
    
    Example 1: Simple distplot of 1 data set
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    
    hist_data = [[1.1, 1.1, 2.5, 3.0, 3.5,
                  3.5, 4.1, 4.4, 4.5, 4.5,
                  5.0, 5.0, 5.2, 5.5, 5.5,
                  5.5, 5.5, 5.5, 6.1, 7.0]]
    
    group_labels = ['distplot example']
    
    fig = create_distplot(hist_data, group_labels)
    
    url = py.plot(fig, filename='Simple distplot', validate=False)
    ```
    
    Example 2: Two data sets and added rug text
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    
    # Add histogram data
    hist1_x = [0.8, 1.2, 0.2, 0.6, 1.6,
               -0.9, -0.07, 1.95, 0.9, -0.2,
               -0.5, 0.3, 0.4, -0.37, 0.6]
    hist2_x = [0.8, 1.5, 1.5, 0.6, 0.59,
               1.0, 0.8, 1.7, 0.5, 0.8,
               -0.3, 1.2, 0.56, 0.3, 2.2]
    
    # Group data together
    hist_data = [hist1_x, hist2_x]
    
    group_labels = ['2012', '2013']
    
    # Add text
    rug_text_1 = ['a1', 'b1', 'c1', 'd1', 'e1',
          'f1', 'g1', 'h1', 'i1', 'j1',
          'k1', 'l1', 'm1', 'n1', 'o1']
    
    rug_text_2 = ['a2', 'b2', 'c2', 'd2', 'e2',
          'f2', 'g2', 'h2', 'i2', 'j2',
          'k2', 'l2', 'm2', 'n2', 'o2']
    
    # Group text together
    rug_text_all = [rug_text_1, rug_text_2]
    
    # Create distplot
    fig = create_distplot(
        hist_data, group_labels, rug_text=rug_text_all, bin_size=.2)
    
    # Add title
    fig['layout'].update(title='Dist Plot')
    
    # Plot!
    url = py.plot(fig, filename='Distplot with rug text', validate=False)
    ```
    
    Example 3: Plot with normal curve and hide rug plot
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    import numpy as np
    
    x1 = np.random.randn(190)
    x2 = np.random.randn(200)+1
    x3 = np.random.randn(200)-1
    x4 = np.random.randn(210)+2
    
    hist_data = [x1, x2, x3, x4]
    group_labels = ['2012', '2013', '2014', '2015']
    
    fig = create_distplot(
        hist_data, group_labels, curve_type='normal',
        show_rug=False, bin_size=.4)
    
    url = py.plot(fig, filename='hist and normal curve', validate=False)
    
    Example 4: Distplot with Pandas
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    import numpy as np
    import pandas as pd
    
    df = pd.DataFrame({'2012': np.random.randn(200),
                       '2013': np.random.randn(200)+1})
    py.iplot(create_distplot([df[c] for c in df.columns], df.columns),
                             filename='examples/distplot with pandas',
                             validate=False)
    ```

Still need help?
Contact Us

For guaranteed 24 hour response turnarounds, upgrade to a Developer Support Plan.