Skip to content

Instantly share code, notes, and snippets.

@martisak
Created October 3, 2021 15:04
Show Gist options
  • Save martisak/a7388765f0ece78457a07a0f91cdb1db to your computer and use it in GitHub Desktop.
Save martisak/a7388765f0ece78457a07a0f91cdb1db to your computer and use it in GitHub Desktop.
Sparklines table generation
import pandas as pd
import os
import numpy as np
def sparkline(data, endpoints=False, ci=True):
d = data.Mean_TemperatureC.values
r = [-12, 31]
hist, bins = np.histogram(d, bins=r[1] - r[0], range=r)
bin_centers = (bins[1:] + bins[:-1]) * 0.5
# Normalize to [0, 1]
hist = (hist - hist.min()) / (hist.max() - hist.min())
bin_centers = (bin_centers - bin_centers.min()) / \
(bin_centers.max() - bin_centers.min())
low = d.mean() - 1.96 * d.std()
high = d.mean() + 1.96 * d.std()
umin = (low - r[0]) / (r[1] - r[0])
umax = (high - r[0]) / (r[1] - r[0])
valmaxind = hist.argmax()
valmaxdate = np.round(bin_centers[valmaxind], 4)
s = r"\begin{sparkline}{15}"
# s += fr"\sparkdot {valmindate} 0 blue "
if ci:
s += r"\sparkrectangleh {} {} ".format(umin, umax)
if endpoints:
a = np.round(hist[0], 4)
b = np.round(hist[-1], 4)
s += fr"\sparkdot 0 {a} black "
s += fr"\sparkdot 1 {b} black "
a = (d.max() - r[0]) / (r[1] - r[0])
s += fr"\sparkdot {a} {0} red "
a = (d.min() - r[0]) / (r[1] - r[0])
s += fr"\sparkdot {a} {0} blue "
s += "\spark "
for d, v in zip(bin_centers, hist):
x = np.round(d, 4)
y = np.round(v, 4)
s += f"{x} {y} "
s += r"/"
s += fr"\sparkdot {valmaxdate} 1 black "
s += r"\end{sparkline}"
return s.strip()
def f(x):
d = {}
d['max'] = x['Mean_TemperatureC'].max()
d['mean'] = x['Mean_TemperatureC'].mean()
d['std'] = x['Mean_TemperatureC'].std()
d['min'] = x['Mean_TemperatureC'].min()
d['sparkline'] = sparkline(x)
return pd.Series(d, index=['mean', 'std', 'min', 'max', 'sparkline'])
# First download the data from plotly's GitHub repository
df = pd.read_csv(
'https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')
df['month'] = pd.to_datetime(df['Date']).dt.month
# we define a dictionary with months that we'll use later
month_dict = {1: 'January', 2: 'February',
3: 'March', 4: 'April',
5: 'May', 6: 'June',
7: 'July', 8: 'August',
9: 'September', 10: 'October',
11: 'November', 12: 'December'}
df = df.sort_values("month")
df["datetime"] = pd.to_datetime(df.Date)
df = df.drop(["Date"], axis=1)
df = df.dropna()
print(df.head())
df4 = (df
.groupby("month")
.apply(f).reset_index()
)
df4['month'] = df4['month'].map(month_dict)
print(df4.head())
df4['max'].replace(df4["max"].max(), "\\color{{red}}\\textbf{{{}}}".format(
df4["max"].max()), inplace=True)
df4['min'].replace(df4["min"].min(), "\\color{{blue}}\\textbf{{{}}}".format(
df4["min"].min()), inplace=True)
df4_max = df4["mean"].max()
df4_min = df4["mean"].min()
df4['mean'].replace(df4_max, "\\color{{red}}\\textbf{{{}}}".format(
np.round(df4_max,1)), inplace=True)
df4['mean'].replace(df4_min, "\\color{{blue}}\\textbf{{{}}}".format(
np.round(df4_min,1)), inplace=True)
colnames = [r"\textbf{Month}", r"$\mu{}$",
r"$\sigma{}$", r"{Min}", r"{Max}", r"{Histogram}"]
cols = pd.MultiIndex(
levels=[[r"{}", r"\textbf{Temperature}"], colnames],
codes=[[0, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5]])
df4.columns = cols
with pd.option_context("max_colwidth", 100):
print(df4)
col_format = "l" + "S[table-format = 2.1, round-precision = 1]" * 4 + "c"
with open(os.path.splitext(
os.path.basename(__file__))[0] + ".tbl", "w") as f:
with pd.option_context("max_colwidth", 100000):
contents = (df4.to_latex(
index=False,
escape=False,
column_format=col_format,
multirow=True,
multicolumn_format="c")).split('\n')
contents.insert(3, r"\cmidrule(lr){2-6}")
f.write("\n".join(contents))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment