Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sparklines table generation
import pandas as pd
import os
import numpy as np
def sparkline(data, endpoints=False, ci=True):
d = data.Mean_TemperatureC.values
r = [-12, 31]
hist, bins = np.histogram(d, bins=r[1] - r[0], range=r)
bin_centers = (bins[1:] + bins[:-1]) * 0.5
# Normalize to [0, 1]
hist = (hist - hist.min()) / (hist.max() - hist.min())
bin_centers = (bin_centers - bin_centers.min()) / \
(bin_centers.max() - bin_centers.min())
low = d.mean() - 1.96 * d.std()
high = d.mean() + 1.96 * d.std()
umin = (low - r[0]) / (r[1] - r[0])
umax = (high - r[0]) / (r[1] - r[0])
valmaxind = hist.argmax()
valmaxdate = np.round(bin_centers[valmaxind], 4)
s = r"\begin{sparkline}{15}"
# s += fr"\sparkdot {valmindate} 0 blue "
if ci:
s += r"\sparkrectangleh {} {} ".format(umin, umax)
if endpoints:
a = np.round(hist[0], 4)
b = np.round(hist[-1], 4)
s += fr"\sparkdot 0 {a} black "
s += fr"\sparkdot 1 {b} black "
a = (d.max() - r[0]) / (r[1] - r[0])
s += fr"\sparkdot {a} {0} red "
a = (d.min() - r[0]) / (r[1] - r[0])
s += fr"\sparkdot {a} {0} blue "
s += "\spark "
for d, v in zip(bin_centers, hist):
x = np.round(d, 4)
y = np.round(v, 4)
s += f"{x} {y} "
s += r"/"
s += fr"\sparkdot {valmaxdate} 1 black "
s += r"\end{sparkline}"
return s.strip()
def f(x):
d = {}
d['max'] = x['Mean_TemperatureC'].max()
d['mean'] = x['Mean_TemperatureC'].mean()
d['std'] = x['Mean_TemperatureC'].std()
d['min'] = x['Mean_TemperatureC'].min()
d['sparkline'] = sparkline(x)
return pd.Series(d, index=['mean', 'std', 'min', 'max', 'sparkline'])
# First download the data from plotly's GitHub repository
df = pd.read_csv(
'https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')
df['month'] = pd.to_datetime(df['Date']).dt.month
# we define a dictionary with months that we'll use later
month_dict = {1: 'January', 2: 'February',
3: 'March', 4: 'April',
5: 'May', 6: 'June',
7: 'July', 8: 'August',
9: 'September', 10: 'October',
11: 'November', 12: 'December'}
df = df.sort_values("month")
df["datetime"] = pd.to_datetime(df.Date)
df = df.drop(["Date"], axis=1)
df = df.dropna()
print(df.head())
df4 = (df
.groupby("month")
.apply(f).reset_index()
)
df4['month'] = df4['month'].map(month_dict)
print(df4.head())
df4['max'].replace(df4["max"].max(), "\\color{{red}}\\textbf{{{}}}".format(
df4["max"].max()), inplace=True)
df4['min'].replace(df4["min"].min(), "\\color{{blue}}\\textbf{{{}}}".format(
df4["min"].min()), inplace=True)
df4_max = df4["mean"].max()
df4_min = df4["mean"].min()
df4['mean'].replace(df4_max, "\\color{{red}}\\textbf{{{}}}".format(
np.round(df4_max,1)), inplace=True)
df4['mean'].replace(df4_min, "\\color{{blue}}\\textbf{{{}}}".format(
np.round(df4_min,1)), inplace=True)
colnames = [r"\textbf{Month}", r"$\mu{}$",
r"$\sigma{}$", r"{Min}", r"{Max}", r"{Histogram}"]
cols = pd.MultiIndex(
levels=[[r"{}", r"\textbf{Temperature}"], colnames],
codes=[[0, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5]])
df4.columns = cols
with pd.option_context("max_colwidth", 100):
print(df4)
col_format = "l" + "S[table-format = 2.1, round-precision = 1]" * 4 + "c"
with open(os.path.splitext(
os.path.basename(__file__))[0] + ".tbl", "w") as f:
with pd.option_context("max_colwidth", 100000):
contents = (df4.to_latex(
index=False,
escape=False,
column_format=col_format,
multirow=True,
multicolumn_format="c")).split('\n')
contents.insert(3, r"\cmidrule(lr){2-6}")
f.write("\n".join(contents))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment