import pandas as pd import os import numpy as np def sparkline(data, endpoints=False, ci=True): d = data.Mean_TemperatureC.values r = [-12, 31] hist, bins = np.histogram(d, bins=r[1] - r[0], range=r) bin_centers = (bins[1:] + bins[:-1]) * 0.5 # Normalize to [0, 1] hist = (hist - hist.min()) / (hist.max() - hist.min()) bin_centers = (bin_centers - bin_centers.min()) / \ (bin_centers.max() - bin_centers.min()) low = d.mean() - 1.96 * d.std() high = d.mean() + 1.96 * d.std() umin = (low - r[0]) / (r[1] - r[0]) umax = (high - r[0]) / (r[1] - r[0]) valmaxind = hist.argmax() valmaxdate = np.round(bin_centers[valmaxind], 4) s = r"\begin{sparkline}{15}" # s += fr"\sparkdot {valmindate} 0 blue " if ci: s += r"\sparkrectangleh {} {} ".format(umin, umax) if endpoints: a = np.round(hist[0], 4) b = np.round(hist[-1], 4) s += fr"\sparkdot 0 {a} black " s += fr"\sparkdot 1 {b} black " a = (d.max() - r[0]) / (r[1] - r[0]) s += fr"\sparkdot {a} {0} red " a = (d.min() - r[0]) / (r[1] - r[0]) s += fr"\sparkdot {a} {0} blue " s += "\spark " for d, v in zip(bin_centers, hist): x = np.round(d, 4) y = np.round(v, 4) s += f"{x} {y} " s += r"/" s += fr"\sparkdot {valmaxdate} 1 black " s += r"\end{sparkline}" return s.strip() def f(x): d = {} d['max'] = x['Mean_TemperatureC'].max() d['mean'] = x['Mean_TemperatureC'].mean() d['std'] = x['Mean_TemperatureC'].std() d['min'] = x['Mean_TemperatureC'].min() d['sparkline'] = sparkline(x) return pd.Series(d, index=['mean', 'std', 'min', 'max', 'sparkline']) # First download the data from plotly's GitHub repository df = pd.read_csv( 'https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv') df['month'] = pd.to_datetime(df['Date']).dt.month # we define a dictionary with months that we'll use later month_dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'} df = df.sort_values("month") df["datetime"] = pd.to_datetime(df.Date) df = df.drop(["Date"], axis=1) df = df.dropna() print(df.head()) df4 = (df .groupby("month") .apply(f).reset_index() ) df4['month'] = df4['month'].map(month_dict) print(df4.head()) df4['max'].replace(df4["max"].max(), "\\color{{red}}\\textbf{{{}}}".format( df4["max"].max()), inplace=True) df4['min'].replace(df4["min"].min(), "\\color{{blue}}\\textbf{{{}}}".format( df4["min"].min()), inplace=True) df4_max = df4["mean"].max() df4_min = df4["mean"].min() df4['mean'].replace(df4_max, "\\color{{red}}\\textbf{{{}}}".format( np.round(df4_max,1)), inplace=True) df4['mean'].replace(df4_min, "\\color{{blue}}\\textbf{{{}}}".format( np.round(df4_min,1)), inplace=True) colnames = [r"\textbf{Month}", r"$\mu{}$", r"$\sigma{}$", r"{Min}", r"{Max}", r"{Histogram}"] cols = pd.MultiIndex( levels=[[r"{}", r"\textbf{Temperature}"], colnames], codes=[[0, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5]]) df4.columns = cols with pd.option_context("max_colwidth", 100): print(df4) col_format = "l" + "S[table-format = 2.1, round-precision = 1]" * 4 + "c" with open(os.path.splitext( os.path.basename(__file__))[0] + ".tbl", "w") as f: with pd.option_context("max_colwidth", 100000): contents = (df4.to_latex( index=False, escape=False, column_format=col_format, multirow=True, multicolumn_format="c")).split('\n') contents.insert(3, r"\cmidrule(lr){2-6}") f.write("\n".join(contents))