Skip to content

Instantly share code, notes, and snippets.

@javierwilson
Created July 24, 2022 07:31
Show Gist options
  • Save javierwilson/e6f056ca7d66dfb1d718db0de7184f9b to your computer and use it in GitHub Desktop.
Save javierwilson/e6f056ca7d66dfb1d718db0de7184f9b to your computer and use it in GitHub Desktop.
counts matches per X time
!/usr/bin/python3
import fileinput
from datetime import datetime, date, timedelta
from collections import OrderedDict
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("-s", "--step", default='minutes', help="group by seconds, minutes or hours")
parser.add_argument('files', metavar='FILE', nargs='*', help='files to read, if empty, stdin is used')
args = parser.parse_args()
#example: 17389426-082a-4441-ac34-908dacccafc6 2022-07-22 21:27:13.966846 [DEBUG] a log line
rows = {}
dates = []
for line in fileinput.input(files=args.files if len(args.files) > 0 else ('-', )):
#split by spaces
cols = line.split(' ')
# get date and time
mydatetime_str = ' '.join(cols[1:3])
# remove ms
mydatetime_str = mydatetime_str.split('.')[0]
# convert to datetim
try:
mydatetime = datetime.strptime(mydatetime_str, '%Y-%m-%d %H:%M:%S')
except:
continue
# truncate seconds
if args.step in ('minutes', 'hours'):
mydatetime = mydatetime.replace(second=0)
# truncate minutes
if args.step in ('hours'):
mydatetime = mydatetime.replace(minute=0)
# add to rows counter
if mydatetime in rows:
rows[mydatetime] += 1
else:
rows[mydatetime] = 1
# add date to list (to get max and min later)
dates.append(mydatetime)
if not dates:
exit("No valid dates.")
# get date range
start_date = min(dates)
end_date = max(dates)
# get step
delta = timedelta(**{args.step: 1})
# print header
print("Start date: %s" %(start_date,))
print("End date: %s" %(end_date,))
print('==============================')
# insert zeros when no data
a_date = start_date
while a_date <= end_date:
if not a_date in rows:
rows[a_date] = 0
a_date += delta
rows_ordered = OrderedDict(sorted(rows.items()))
for row in rows_ordered:
print("%s,%s" % (row.strftime("%Y-%m-%d %H:%M:%S"), rows[row]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment