Skip to content

Instantly share code, notes, and snippets.

@rldotai
Created December 19, 2015 06:40
Show Gist options
  • Save rldotai/ec1a955b870390718591 to your computer and use it in GitHub Desktop.
Save rldotai/ec1a955b870390718591 to your computer and use it in GitHub Desktop.
A quick script to format tables that were in the `verbatim` environment as proper tables using Pandas.
#!python3
"""
A script to read a document containing tables that I left as "verbatim" and
format them properly in LaTeX.
We go from this:
\begin{verbatim}
sparsity num_active accuracy test_accuracy train_accuracy
count 4000.000000 4000.000000 4000.000000 4000.000000 4000
mean 0.984240 124.657750 0.957216 0.829739 1
std 0.000943 7.460431 0.020602 0.079698 0
min 0.981290 94.000000 0.864583 0.500000 1
25% 0.983565 119.000000 0.947917 0.772727 1
50% 0.984197 125.000000 0.958333 0.833333 1
75% 0.984956 130.000000 0.968750 0.875000 1
max 0.988116 148.000000 1.000000 1.000000 1
\end{verbatim}
to this:
\begin{tabular}{lrrrrr}
\toprule
{} & sparsity & num\_active & accuracy & test\_accuracy & train\_accuracy \\
\midrule
mean & 0.9842 & 124.6577 & 0.9572 & 0.8297 & 1.0000 \\
std & 0.0009 & 7.4604 & 0.0206 & 0.0797 & 0.0000 \\
min & 0.9813 & 94.0000 & 0.8646 & 0.5000 & 1.0000 \\
25\% & 0.9836 & 119.0000 & 0.9479 & 0.7727 & 1.0000 \\
50\% & 0.9842 & 125.0000 & 0.9583 & 0.8333 & 1.0000 \\
75\% & 0.9850 & 130.0000 & 0.9688 & 0.8750 & 1.0000 \\
max & 0.9881 & 148.0000 & 1.0000 & 1.0000 & 1.0000 \\
\bottomrule
\end{tabular}
"""
import pandas as pd
import sys
def read_table(text, separator=None, linebreak='\n', header=True, index=True):
def parse_row(line):
return [x.strip() for x in line.split(separator)]
rows = text.split(linebreak)
# name the columns according to the first row, if desired
if header:
cols = parse_row(rows.pop(0))
else:
cols = None
# parse each row
data = [parse_row(x) for x in rows]
# if we want to keep the index separate, we can do so
if index:
indices = [x.pop(0) for x in data]
else:
indices = None
# convert to a pandas dataframe
return pd.DataFrame(data, columns=cols, index=indices)
def my_df_format(df):
"""Quick and dirty formatting of a dataframe. Adapt as necessary."""
# sparsity num_active accuracy test_accuracy, train_accuracy
df = df.copy()
df = df.drop(['count'])
dtypes = {
'sparsity': float,
'num_active': float,
'accuracy': float,
'test_accuracy': float,
'train_accuracy': float,
}
for k, v in dtypes.items():
df[k] = df[k].astype(v)
formats = {
'sparsity': '{:.4f}'.format,
'num_active': '{:.4f}'.format,
'accuracy': '{:.4f}'.format,
'test_accuracy': '{:.4f}'.format,
'train_accuracy': '{:.4f}'.format,
}
# print(df.to_latex(formatters=formats))
return df.to_latex(formatters=formats)
def format_document(filename):
"""Process the document; a bit of a kludge."""
with open(filename, 'r') as f:
lines = f.readlines()
ret = []
in_table = False
for line in lines:
# print(line)
if r'\begin{verbatim}' in line:
if in_table:
raise Exception('Looks like something went wrong') # exceptional exception
else:
in_table = True
tmp = []
elif in_table:
if r'\end{verbatim}' in line:
txt = ''.join(tmp).strip()
# print(txt)
df = read_table(txt)
ret.append(my_df_format(df))
in_table = False
else:
tmp.append(line)
else:
ret.append(line)
return ''.join(ret)
# Get the name of the file to operate on, and optionally an output
if __name__ == "__main__":
txt = format_document(sys.argv[1])
if len(sys.argv) > 2:
open(sys.argv[2], 'w').write(txt)
else:
print(txt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment