Skip to content

Instantly share code, notes, and snippets.

@tomron
Created September 15, 2022 10:11
Show Gist options
  • Save tomron/8a8e2e17538c303f3bef60cd7f41f315 to your computer and use it in GitHub Desktop.
Save tomron/8a8e2e17538c303f3bef60cd7f41f315 to your computer and use it in GitHub Desktop.
Think outside of the box plot - code accompanying my talk in DataTLV about box plots
nimport numpy as np
import pandas as pd
import sys
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly
import plotly.express as px
import matplotlib.pyplot as plt
import random
output_folder ='datatlv'
color = "#F7931E"
method='averaged_inverted_cdf'
np.random.seed(1)
mu, sigma = 0, 4
small_sample_size = 10
medium_sample_size = 100
large_sample_size = 1000
small_sample = np.random.normal(mu, sigma, small_sample_size)
medium_sample = np.random.normal(mu, sigma, medium_sample_size)
large_sample = np.random.normal(mu, sigma, large_sample_size)
data = [small_sample, large_sample]
uniform_data = list(range(0, 101))
interval_data = [0] * 25 + [25] * 24 + [50]*3 + [75]*24 + [100] * 25
def get_basic_layout(title=''):
return go.Layout(
xaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
yaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
showlegend=False,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
title_x=0.5,
font=dict(size=30),
title=title
)
###############################################################################################################
def get_width(sample):
return np.log2(len(sample))/20
def build_traces(samples, names, args={}, width_func=None, output_path=None, layout=get_basic_layout(), show_text=True, only_traces=False):
if width_func is None:
traces = [go.Box(y=sample, name=name, **args) for sample, name in zip(samples, names)]
else:
traces = []
for sample, name in zip(samples, names):
traces.append(go.Box(y=sample, name=name, **args, width=width_func(sample)))
if only_traces:
return traces
fig = go.Figure(layout=layout, data=traces)
if show_text:
for sample_idx, sample in enumerate(samples):
for x in zip(["min","q1","med","q3","max"],np.percentile(sample, [0, 25, 50, 75, 100], method=method)):
fig.add_annotation(
x=0.33 + sample_idx,
y=x[1],
text=f"{x[0]}: {round(x[1], 2)}",
font_size=20,
showarrow=False
)
if output_path:
fig.write_image(output_path, scale=10)#, dpi=1200)
return fig
basic_args = {}
points_args = {'boxpoints':'all', 'jitter':0.3, 'pointpos':-1.8}
points_args.update(basic_args)
fig = build_traces(
data,
names=['sample1', 'sample2'],
args=basic_args,
output_path='datatlv/simple_box.png',
layout=get_basic_layout('Simple Box Plot'),
show_text=False)
fig.show()
fig = build_traces(
data,
names=['sample1', 'sample2'],
args=basic_args,
output_path='datatlv/simple_box.png',
layout=get_basic_layout('Simple Box Plot'),
show_text=True)
fig.show()
fig = build_traces(
data,
names=['sample1', 'sample2'],
args=points_args,
output_path='datatlv/simple_box_with_points.png',
layout=get_basic_layout('Simple Box Plot with Points'),
show_text=False)
fig.show()
fig = build_traces(
data,
names=['sample1', 'sample2'],
args=basic_args,
width_func=get_width,
output_path='datatlv/simple_box_with_width.png',
layout=get_basic_layout('Simple Box Plot adjusted width'),
show_text=False)
fig.show()
#TODO - side by side with and without width?
fig = make_subplots(rows=1, cols=2)
traces = build_traces(
data,
names=['sample1', 'sample2'],
args=basic_args,
only_traces=True)
for trace in traces:
fig.add_trace(
trace,
row=1, col=1
)
traces = build_traces(
data,
names=['sample1', 'sample2'],
args=basic_args,
width_func=get_width,
only_traces=True)
fig.add_traces(traces, row=1, col=2)
# for trace in traces:
# fig.add_trace(
# trace,
# row=1, col=2
# )
fig.update_layout(
title="Box Plot with and without Width",
xaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
yaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
xaxis2 = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
yaxis2 = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
showlegend=False,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
title_x=0.5,
font=dict(size=30))
for d in dir(fig):
print(d)
fig.show()
###############################################################################################################
fig = go.Figure(layout=get_basic_layout('Simple Box Plot'))
fig.add_trace(go.Box(y=uniform_data, quartilemethod="inclusive", name="sample1"))
fig.add_trace(go.Box(y=interval_data, quartilemethod="inclusive", name="sample2"))
fig.show()
fig = go.Figure(layout=get_basic_layout('Simple Box Plot with Points'))
fig.add_trace(go.Box(y=uniform_data, quartilemethod="inclusive", name="sample1"))
fig.add_trace(go.Box(y=interval_data, quartilemethod="inclusive", name="sample2"))
fig.update_traces(boxpoints='all', jitter=0.3)
fig.show()
###############################################################################################################
histogram = np.concatenate((
np.random.randint(90, 100, 7),
np.random.randint(80, 90, 5),
np.random.randint(70, 80, 11),
np.random.randint(60, 70, 14),
np.random.randint(50, 60, 17),
np.random.randint(40, 50, 5),
np.random.randint(30, 40, 7),
np.random.randint(20, 30, 8),
np.random.randint(0, 10, 26))
)
labels = ['90 to <100', '80 to <90', '70 to <80', '60 to <70', '50 to <60', '40 to <50', '30 to <40', '20 to <30', '10 to <20', '0 to <10']
fig = go.Figure(data=[go.Histogram(y=histogram)], layout=get_basic_layout('Patients Age Histogram'))
fig.update_layout(
yaxis = dict(
tickmode = 'array',
tickvals = [95, 85, 75, 65, 55, 45, 35, 25, 15, 5],
ticktext = labels
))
fig.show()
fig = go.Figure(data=[go.Box(y=histogram, name='Patients')],layout=get_basic_layout('Patients Age Box Plot'))
fig.show()
fig = go.Figure(data=[go.Box(y=histogram, name='Patients', notched=True)],layout=get_basic_layout('Patients Age Box Plot Notched'))
fig.show()
fig = make_subplots(rows=1, cols=2)
fig.add_trace(
go.Histogram(y=histogram),
row=1, col=1
)
fig.add_trace(
go.Box(y=histogram, marker_color = '#636EFA', name='Patients'),
row=1, col=2
)
fig.update_layout(
title="Histogram vs Box Plot",
xaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
yaxis = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
xaxis2 = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
yaxis2 = dict(
showgrid=False,
zeroline=False,
showline=True,
mirror=True,
linewidth=2,
linecolor='black'),
showlegend=False,
paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',
title_x=0.5,
font=dict(size=30))
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment