Created
July 24, 2022 07:31
-
-
Save javierwilson/e6f056ca7d66dfb1d718db0de7184f9b to your computer and use it in GitHub Desktop.
counts matches per X time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!/usr/bin/python3 | |
import fileinput | |
from datetime import datetime, date, timedelta | |
from collections import OrderedDict | |
from argparse import ArgumentParser | |
parser = ArgumentParser() | |
parser.add_argument("-s", "--step", default='minutes', help="group by seconds, minutes or hours") | |
parser.add_argument('files', metavar='FILE', nargs='*', help='files to read, if empty, stdin is used') | |
args = parser.parse_args() | |
#example: 17389426-082a-4441-ac34-908dacccafc6 2022-07-22 21:27:13.966846 [DEBUG] a log line | |
rows = {} | |
dates = [] | |
for line in fileinput.input(files=args.files if len(args.files) > 0 else ('-', )): | |
#split by spaces | |
cols = line.split(' ') | |
# get date and time | |
mydatetime_str = ' '.join(cols[1:3]) | |
# remove ms | |
mydatetime_str = mydatetime_str.split('.')[0] | |
# convert to datetim | |
try: | |
mydatetime = datetime.strptime(mydatetime_str, '%Y-%m-%d %H:%M:%S') | |
except: | |
continue | |
# truncate seconds | |
if args.step in ('minutes', 'hours'): | |
mydatetime = mydatetime.replace(second=0) | |
# truncate minutes | |
if args.step in ('hours'): | |
mydatetime = mydatetime.replace(minute=0) | |
# add to rows counter | |
if mydatetime in rows: | |
rows[mydatetime] += 1 | |
else: | |
rows[mydatetime] = 1 | |
# add date to list (to get max and min later) | |
dates.append(mydatetime) | |
if not dates: | |
exit("No valid dates.") | |
# get date range | |
start_date = min(dates) | |
end_date = max(dates) | |
# get step | |
delta = timedelta(**{args.step: 1}) | |
# print header | |
print("Start date: %s" %(start_date,)) | |
print("End date: %s" %(end_date,)) | |
print('==============================') | |
# insert zeros when no data | |
a_date = start_date | |
while a_date <= end_date: | |
if not a_date in rows: | |
rows[a_date] = 0 | |
a_date += delta | |
rows_ordered = OrderedDict(sorted(rows.items())) | |
for row in rows_ordered: | |
print("%s,%s" % (row.strftime("%Y-%m-%d %H:%M:%S"), rows[row])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment