Skip to content

Instantly share code, notes, and snippets.

@fferegrino
Last active February 2, 2018 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fferegrino/e0e184354ddb454570c34a707769fa51 to your computer and use it in GitHub Desktop.
Save fferegrino/e0e184354ddb454570c34a707769fa51 to your computer and use it in GitHub Desktop.
Read Terrier output
import re
import pandas as pd
def __parse_time_elapsed__(time_line):
time = float(time_line.split(":")[1][:-len("seconds.")])
return time
def __file_to_lines__(path):
l = None
with open(path, "r") as r:
l = r.readlines()
return l
def parse_stats(stats, contains_command=True):
if type(stats) == str:
lines = __file_to_lines__(stats)
else:
lines = stats
data_start = 1 if contains_command else 0
clean = [l[65:] for l in lines[data_start + 3:-1]]
sts = {}
for s in clean:
a = s.split(':')
sts[a[0]] = int(a[1].strip())
sts["time elapsed"] = __parse_time_elapsed__(lines[-1])
return sts
parsed_output = re.compile("([0-9]+\:[0-9]{2}\:[0-9]{2}\.[0-9]{3})\s\[\w+\]\s([A-Z]+)\s+([\w\.]+) - ([\w\s0-9\:]+)")
def parse_indexing(indexing_results, contains_command=True):
if type(indexing_results) == str:
lines = __file_to_lines__(indexing_results)
else:
lines = indexing_results
data_start = 1 if contains_command else 0
clean = lines[data_start + 2:-1]
sts = {}
r = []
for s in clean:
match = parsed_output.search(s)
if match:
time = match.group(1).strip()
kind = match.group(2).strip()
clase = match.group(3).strip()
message = match.group(4).strip()
r.append([time, kind, clase, message])
info = pd.DataFrame(r, columns=['time', 'kind', 'clase', 'message'])
time = __parse_time_elapsed__(lines[-1])
return info, time
def parse_run_results(results, contains_command=True):
if type(results) == str:
lines = __file_to_lines__(results)
else:
lines = results
sts = {}
to_index = lines[-2].index("to ")
sts["results"] = lines[-2][to_index + 3:].strip()
sts["time elapsed"] = __parse_time_elapsed__(lines[-1])
return sts
def parse_evaluation_results(results, contains_command=True):
if type(results) == str:
lines = __file_to_lines__(results)
else:
lines = results
sts = {}
sts["results"] = lines[-3][85:].strip()
sts["time elapsed"] = __parse_time_elapsed__(lines[-1])
return sts
def read_eval(eval_file, skip_first=True):
data = pd.read_table(eval_file,
sep='\t',
header=None,
skiprows=[0] if skip_first else None,
names=['measure', 'query', 'value'])
# Split frames:
data.dropna(inplace=True)
data['measure'] = data['measure'].str.strip()
per_query = data[data['query'] != 'all'].pivot(index='query', columns='measure', values='value')
per_query.index = per_query.index.astype(int)
per_query.sort_index(inplace=True)
totals = data[data['query'] == 'all'].pivot(index='query', columns='measure', values='value')
return totals, per_query
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment