Skip to content

Instantly share code, notes, and snippets.

@lelandbatey
Last active July 27, 2020 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lelandbatey/58330f13a02e7b5a0af179d5dee9262b to your computer and use it in GitHub Desktop.
Save lelandbatey/58330f13a02e7b5a0af179d5dee9262b to your computer and use it in GitHub Desktop.
A tool for viewing streams of timestamps as histograms. Uses Matplotlib and Pandas
#!/usr/bin/env python3
'''
A tool for viewing streams of timestamps as histograms.
'''
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
import pandas as pd
import itertools
import argparse
import sys
# Taken from here: https://stackoverflow.com/a/53995225
def line_format(label):
"""
Convert time label to the format of pandas line plot
"""
month = label.month_name()[:3]
if month == 'Jan':
month += f'\n{label.year}'
return month
def dataframify_datetimes(dt_sequences):
all_ts = itertools.chain.from_iterable(dt_sequences)
all_ts = sorted(list(set(all_ts)))
count_lists = list()
for tsl in dt_sequences:
x = {y: 0 for y in all_ts}
for ts in tsl:
x[ts] += 1
cl = sorted([(k, v) for k, v in x.items()], key=lambda z: z[0])
count_lists.append([w[1] for w in cl])
assert len(cl) == len(all_ts)
return all_ts, count_lists
def read_ts_file(f, parsefunc):
data = f.read()
return [parsefunc(x) for x in data.split('\n') if x.strip()]
def make_fallback_funcs(funcs):
def try_each(ts):
for f in funcs:
try:
return f(ts)
except Exception as e:
continue
raise ValueError(f"No parse functions could successfully parse value {ts}")
return try_each
def main():
parser = argparse.ArgumentParser(description="View streams of epoch millisecond timestamps as histograms")
parser.add_argument(
'files',
metavar="FILES",
help="files with epoch millisecond timestamps. If ommited, reads from stdin.",
nargs="?",
default=None
)
parser.add_argument(
'--bin-size',
help=
"How to divide the data for viewing: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases",
default="1D"
)
parser.add_argument(
'--image', help="Saves the generated graph as an image instead of showing an interactive window.", default=None
)
parser.add_argument(
'--format',
help="The datetime format to use when parsing each line of "
"the input file. May be either a strptime() compatible format such as '%%Y-%%m-%%d %%H:%%M:%%S.%%f' or the "
"string EPOCHMS which causes each line to be parsed as an integer representing the "
"number of milliseconds since the UNIX epoch at UTC. Default is EPOCHMS.",
action='append',
default=["EPOCHMS"]
)
args = parser.parse_args()
def make_parsets(fmt):
if fmt == "EPOCHMS":
pf = lambda x: datetime.fromtimestamp(int(x.strip()) / 1000)
else:
pf = lambda x: datetime.strptime(x.strip(), fmt)
return pf
parsefuncs = [make_parsets(fmt) for fmt in args.format]
parsefunc = make_fallback_funcs(parsefuncs)
datetime_sequences = list()
if args.files:
for fn in args.files:
with open(fn) as f:
datetime_sequences.append(read_ts_file(f, parsefunc))
else:
data = sys.stdin.read()
timestamps = [parsefunc(x) for x in data.split('\n') if x.strip()]
datetime_sequences = [timestamps]
index, columns = dataframify_datetimes(datetime_sequences)
df = pd.DataFrame(zip(*columns), index=index)
df = df.resample(args.bin_size).sum()
ax = df.plot(kind='line')
plt.tight_layout()
if args.image:
fig = ax.get_figure()
fig.savefig(args.image, dpi=2000)
else:
plt.show()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment