lelandbatey/histogram_timestamps.py

## histogram_timestamps.py
#!/usr/bin/env python3
'''
A tool for viewing streams of timestamps as histograms.
'''
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
import pandas as pd
import itertools
import argparse
import sys


# Taken from here: https://stackoverflow.com/a/53995225
def line_format(label):
    """
    Convert time label to the format of pandas line plot
    """
    month = label.month_name()[:3]
    if month == 'Jan':
        month += f'\n{label.year}'
    return month


def dataframify_datetimes(dt_sequences):
    all_ts = itertools.chain.from_iterable(dt_sequences)
    all_ts = sorted(list(set(all_ts)))
    count_lists = list()

    for tsl in dt_sequences:
        x = {y: 0 for y in all_ts}
        for ts in tsl:
            x[ts] += 1
        cl = sorted([(k, v) for k, v in x.items()], key=lambda z: z[0])
        count_lists.append([w[1] for w in cl])
        assert len(cl) == len(all_ts)
    return all_ts, count_lists


def read_ts_file(f, parsefunc):
    data = f.read()
    return [parsefunc(x) for x in data.split('\n') if x.strip()]


def make_fallback_funcs(funcs):
    def try_each(ts):
        for f in funcs:
            try:
                return f(ts)
            except Exception as e:
                continue
        raise ValueError(f"No parse functions could successfully parse value {ts}")

    return try_each


def main():
    parser = argparse.ArgumentParser(description="View streams of epoch millisecond timestamps as histograms")
    parser.add_argument(
        'files',
        metavar="FILES",
        help="files with epoch millisecond timestamps. If ommited, reads from stdin.",
        nargs="?",
        default=None
    )
    parser.add_argument(
        '--bin-size',
        help=
        "How to divide the data for viewing: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases",
        default="1D"
    )
    parser.add_argument(
        '--image', help="Saves the generated graph as an image instead of showing an interactive window.", default=None
    )
    parser.add_argument(
        '--format',
        help="The datetime format to use when parsing each line of "
        "the input file. May be either a strptime() compatible format such as '%%Y-%%m-%%d %%H:%%M:%%S.%%f' or the "
        "string EPOCHMS which causes each line to be parsed as an integer representing the "
        "number of milliseconds since the UNIX epoch at UTC. Default is EPOCHMS.",
        action='append',
        default=["EPOCHMS"]
    )
    args = parser.parse_args()

    def make_parsets(fmt):
        if fmt == "EPOCHMS":
            pf = lambda x: datetime.fromtimestamp(int(x.strip()) / 1000)
        else:
            pf = lambda x: datetime.strptime(x.strip(), fmt)
        return pf

    parsefuncs = [make_parsets(fmt) for fmt in args.format]
    parsefunc = make_fallback_funcs(parsefuncs)

    datetime_sequences = list()
    if args.files:
        for fn in args.files:
            with open(fn) as f:
                datetime_sequences.append(read_ts_file(f, parsefunc))
    else:
        data = sys.stdin.read()
        timestamps = [parsefunc(x) for x in data.split('\n') if x.strip()]
        datetime_sequences = [timestamps]

    index, columns = dataframify_datetimes(datetime_sequences)
    df = pd.DataFrame(zip(*columns), index=index)
    df = df.resample(args.bin_size).sum()
    ax = df.plot(kind='line')

    plt.tight_layout()
    if args.image:
        fig = ax.get_figure()
        fig.savefig(args.image, dpi=2000)
    else:
        plt.show()


if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	'''
	A tool for viewing streams of timestamps as histograms.
	'''
	import matplotlib.pyplot as plt
	from datetime import datetime
	import matplotlib.dates as mdates
	import pandas as pd
	import itertools
	import argparse
	import sys


	# Taken from here: https://stackoverflow.com/a/53995225
	def line_format(label):
	"""
	Convert time label to the format of pandas line plot
	"""
	month = label.month_name()[:3]
	if month == 'Jan':
	month += f'\n{label.year}'
	return month


	def dataframify_datetimes(dt_sequences):
	all_ts = itertools.chain.from_iterable(dt_sequences)
	all_ts = sorted(list(set(all_ts)))
	count_lists = list()

	for tsl in dt_sequences:
	x = {y: 0 for y in all_ts}
	for ts in tsl:
	x[ts] += 1
	cl = sorted([(k, v) for k, v in x.items()], key=lambda z: z[0])
	count_lists.append([w[1] for w in cl])
	assert len(cl) == len(all_ts)
	return all_ts, count_lists


	def read_ts_file(f, parsefunc):
	data = f.read()
	return [parsefunc(x) for x in data.split('\n') if x.strip()]


	def make_fallback_funcs(funcs):
	def try_each(ts):
	for f in funcs:
	try:
	return f(ts)
	except Exception as e:
	continue
	raise ValueError(f"No parse functions could successfully parse value {ts}")

	return try_each


	def main():
	parser = argparse.ArgumentParser(description="View streams of epoch millisecond timestamps as histograms")
	parser.add_argument(
	'files',
	metavar="FILES",
	help="files with epoch millisecond timestamps. If ommited, reads from stdin.",
	nargs="?",
	default=None
	)
	parser.add_argument(
	'--bin-size',
	help=
	"How to divide the data for viewing: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases",
	default="1D"
	)
	parser.add_argument(
	'--image', help="Saves the generated graph as an image instead of showing an interactive window.", default=None
	)
	parser.add_argument(
	'--format',
	help="The datetime format to use when parsing each line of "
	"the input file. May be either a strptime() compatible format such as '%%Y-%%m-%%d %%H:%%M:%%S.%%f' or the "
	"string EPOCHMS which causes each line to be parsed as an integer representing the "
	"number of milliseconds since the UNIX epoch at UTC. Default is EPOCHMS.",
	action='append',
	default=["EPOCHMS"]
	)
	args = parser.parse_args()

	def make_parsets(fmt):
	if fmt == "EPOCHMS":
	pf = lambda x: datetime.fromtimestamp(int(x.strip()) / 1000)
	else:
	pf = lambda x: datetime.strptime(x.strip(), fmt)
	return pf

	parsefuncs = [make_parsets(fmt) for fmt in args.format]
	parsefunc = make_fallback_funcs(parsefuncs)

	datetime_sequences = list()
	if args.files:
	for fn in args.files:
	with open(fn) as f:
	datetime_sequences.append(read_ts_file(f, parsefunc))
	else:
	data = sys.stdin.read()
	timestamps = [parsefunc(x) for x in data.split('\n') if x.strip()]
	datetime_sequences = [timestamps]

	index, columns = dataframify_datetimes(datetime_sequences)
	df = pd.DataFrame(zip(*columns), index=index)
	df = df.resample(args.bin_size).sum()
	ax = df.plot(kind='line')

	plt.tight_layout()
	if args.image:
	fig = ax.get_figure()
	fig.savefig(args.image, dpi=2000)
	else:
	plt.show()


	if __name__ == '__main__':
	main()