martisak/seattle_table.py

## seattle_table.py
import pandas as pd
import os
import numpy as np


def sparkline(data, endpoints=False, ci=True):

    d = data.Mean_TemperatureC.values

    r = [-12, 31]
    hist, bins = np.histogram(d, bins=r[1] - r[0], range=r)
    bin_centers = (bins[1:] + bins[:-1]) * 0.5

    # Normalize to [0, 1]
    hist = (hist - hist.min()) / (hist.max() - hist.min())
    bin_centers = (bin_centers - bin_centers.min()) / \
        (bin_centers.max() - bin_centers.min())

    low = d.mean() - 1.96 * d.std()
    high = d.mean() + 1.96 * d.std()

    umin = (low - r[0]) / (r[1] - r[0])
    umax = (high - r[0]) / (r[1] - r[0])

    valmaxind = hist.argmax()
    valmaxdate = np.round(bin_centers[valmaxind], 4)

    s = r"\begin{sparkline}{15}"
    # s += fr"\sparkdot {valmindate} 0 blue "


    if ci:
        s += r"\sparkrectangleh {} {} ".format(umin, umax)

    if endpoints:
        a = np.round(hist[0], 4)
        b = np.round(hist[-1], 4)
        s += fr"\sparkdot 0 {a} black "
        s += fr"\sparkdot 1 {b} black "

    a = (d.max() - r[0]) / (r[1] - r[0])
    s += fr"\sparkdot {a} {0} red "

    a = (d.min() - r[0]) / (r[1] - r[0])
    s += fr"\sparkdot {a} {0} blue "

    s += "\spark "

    for d, v in zip(bin_centers, hist):
        x = np.round(d, 4)
        y = np.round(v, 4)

        s += f"{x} {y} "

    s += r"/"
    s += fr"\sparkdot {valmaxdate} 1 black "
    s += r"\end{sparkline}"
    return s.strip()


def f(x):
    d = {}

    d['max'] = x['Mean_TemperatureC'].max()
    d['mean'] = x['Mean_TemperatureC'].mean()
    d['std'] = x['Mean_TemperatureC'].std()
    d['min'] = x['Mean_TemperatureC'].min()
    d['sparkline'] = sparkline(x)
    return pd.Series(d, index=['mean', 'std', 'min', 'max', 'sparkline'])


# First download the data from plotly's GitHub repository
df = pd.read_csv(
    'https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')

df['month'] = pd.to_datetime(df['Date']).dt.month

# we define a dictionary with months that we'll use later
month_dict = {1: 'January', 2: 'February',
              3: 'March', 4: 'April',
              5: 'May', 6: 'June',
              7: 'July', 8: 'August',
              9: 'September', 10: 'October',
              11: 'November', 12: 'December'}

df = df.sort_values("month")

df["datetime"] = pd.to_datetime(df.Date)
df = df.drop(["Date"], axis=1)
df = df.dropna()

print(df.head())

df4 = (df
       .groupby("month")
       .apply(f).reset_index()
       )

df4['month'] = df4['month'].map(month_dict)

print(df4.head())

df4['max'].replace(df4["max"].max(), "\\color{{red}}\\textbf{{{}}}".format(
    df4["max"].max()), inplace=True)
df4['min'].replace(df4["min"].min(), "\\color{{blue}}\\textbf{{{}}}".format(
    df4["min"].min()), inplace=True)

df4_max = df4["mean"].max()
df4_min = df4["mean"].min()

df4['mean'].replace(df4_max, "\\color{{red}}\\textbf{{{}}}".format(
    np.round(df4_max,1)), inplace=True)

df4['mean'].replace(df4_min, "\\color{{blue}}\\textbf{{{}}}".format(
   np.round(df4_min,1)), inplace=True)

colnames = [r"\textbf{Month}", r"$\mu{}$",
            r"$\sigma{}$", r"{Min}", r"{Max}", r"{Histogram}"]

cols = pd.MultiIndex(
    levels=[[r"{}", r"\textbf{Temperature}"], colnames],
    codes=[[0, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5]])

df4.columns = cols

with pd.option_context("max_colwidth", 100):
    print(df4)

col_format = "l" + "S[table-format = 2.1, round-precision = 1]" * 4 + "c"

with open(os.path.splitext(
        os.path.basename(__file__))[0] + ".tbl", "w") as f:

    with pd.option_context("max_colwidth", 100000):

        contents = (df4.to_latex(
            index=False,
            escape=False,
            column_format=col_format,
            multirow=True,
            multicolumn_format="c")).split('\n')

        contents.insert(3, r"\cmidrule(lr){2-6}")
        f.write("\n".join(contents))
	import pandas as pd
	import os
	import numpy as np


	def sparkline(data, endpoints=False, ci=True):

	d = data.Mean_TemperatureC.values

	r = [-12, 31]
	hist, bins = np.histogram(d, bins=r[1] - r[0], range=r)
	bin_centers = (bins[1:] + bins[:-1]) * 0.5

	# Normalize to [0, 1]
	hist = (hist - hist.min()) / (hist.max() - hist.min())
	bin_centers = (bin_centers - bin_centers.min()) / \
	(bin_centers.max() - bin_centers.min())

	low = d.mean() - 1.96 * d.std()
	high = d.mean() + 1.96 * d.std()

	umin = (low - r[0]) / (r[1] - r[0])
	umax = (high - r[0]) / (r[1] - r[0])

	valmaxind = hist.argmax()
	valmaxdate = np.round(bin_centers[valmaxind], 4)

	s = r"\begin{sparkline}{15}"
	# s += fr"\sparkdot {valmindate} 0 blue "



	if ci:
	s += r"\sparkrectangleh {} {} ".format(umin, umax)

	if endpoints:
	a = np.round(hist[0], 4)
	b = np.round(hist[-1], 4)
	s += fr"\sparkdot 0 {a} black "
	s += fr"\sparkdot 1 {b} black "

	a = (d.max() - r[0]) / (r[1] - r[0])
	s += fr"\sparkdot {a} {0} red "

	a = (d.min() - r[0]) / (r[1] - r[0])
	s += fr"\sparkdot {a} {0} blue "

	s += "\spark "

	for d, v in zip(bin_centers, hist):
	x = np.round(d, 4)
	y = np.round(v, 4)

	s += f"{x} {y} "

	s += r"/"
	s += fr"\sparkdot {valmaxdate} 1 black "
	s += r"\end{sparkline}"
	return s.strip()


	def f(x):
	d = {}

	d['max'] = x['Mean_TemperatureC'].max()
	d['mean'] = x['Mean_TemperatureC'].mean()
	d['std'] = x['Mean_TemperatureC'].std()
	d['min'] = x['Mean_TemperatureC'].min()
	d['sparkline'] = sparkline(x)
	return pd.Series(d, index=['mean', 'std', 'min', 'max', 'sparkline'])


	# First download the data from plotly's GitHub repository
	df = pd.read_csv(
	'https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv')

	df['month'] = pd.to_datetime(df['Date']).dt.month

	# we define a dictionary with months that we'll use later
	month_dict = {1: 'January', 2: 'February',
	3: 'March', 4: 'April',
	5: 'May', 6: 'June',
	7: 'July', 8: 'August',
	9: 'September', 10: 'October',
	11: 'November', 12: 'December'}

	df = df.sort_values("month")

	df["datetime"] = pd.to_datetime(df.Date)
	df = df.drop(["Date"], axis=1)
	df = df.dropna()

	print(df.head())

	df4 = (df
	.groupby("month")
	.apply(f).reset_index()
	)

	df4['month'] = df4['month'].map(month_dict)

	print(df4.head())

	df4['max'].replace(df4["max"].max(), "\\color{{red}}\\textbf{{{}}}".format(
	df4["max"].max()), inplace=True)
	df4['min'].replace(df4["min"].min(), "\\color{{blue}}\\textbf{{{}}}".format(
	df4["min"].min()), inplace=True)

	df4_max = df4["mean"].max()
	df4_min = df4["mean"].min()

	df4['mean'].replace(df4_max, "\\color{{red}}\\textbf{{{}}}".format(
	np.round(df4_max,1)), inplace=True)

	df4['mean'].replace(df4_min, "\\color{{blue}}\\textbf{{{}}}".format(
	np.round(df4_min,1)), inplace=True)

	colnames = [r"\textbf{Month}", r"$\mu{}$",
	r"$\sigma{}$", r"{Min}", r"{Max}", r"{Histogram}"]

	cols = pd.MultiIndex(
	levels=[[r"{}", r"\textbf{Temperature}"], colnames],
	codes=[[0, 1, 1, 1, 1, 1], [0, 1, 2, 3, 4, 5]])

	df4.columns = cols

	with pd.option_context("max_colwidth", 100):
	print(df4)

	col_format = "l" + "S[table-format = 2.1, round-precision = 1]" * 4 + "c"

	with open(os.path.splitext(
	os.path.basename(__file__))[0] + ".tbl", "w") as f:

	with pd.option_context("max_colwidth", 100000):

	contents = (df4.to_latex(
	index=False,
	escape=False,
	column_format=col_format,
	multirow=True,
	multicolumn_format="c")).split('\n')

	contents.insert(3, r"\cmidrule(lr){2-6}")
	f.write("\n".join(contents))