Skip to content

Instantly share code, notes, and snippets.

@milimetric
Created October 5, 2016 18:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save milimetric/85e66805f658a23832c8e35dcbc4fb1c to your computer and use it in GitHub Desktop.
Save milimetric/85e66805f658a23832c8e35dcbc4fb1c to your computer and use it in GitHub Desktop.
# download /srv/reportupdater/output/metrics/sessions in the working folder as sessions.old, then run:
# python squish.py
import csv
from path import glob
from collections import OrderedDict, defaultdict
from datetime import datetime, timedelta
DATE_FORMAT = '%Y-%m-%d'
def getDate(dtString):
return datetime.strptime(dtString, DATE_FORMAT)
def toString(dt):
return dt.strftime(DATE_FORMAT)
def lastSunday(dt):
return dt - timedelta(days=(dt.isoweekday() % 7))
for path in glob.glob('sessions.old/*/*.tsv'):
output = OrderedDict()
with open(path) as f:
currentWeek = defaultdict(lambda: 0)
previousDateKey = None
dateKey = None
header = True
for row in csv.reader(f, delimiter='\t'):
if header:
output[row[0]] = {row[1]: row[2]}
header = False
continue
dateKey = lastSunday(getDate(row[0]))
actionsKey = row[1]
actionCount = int(row[2] or 0)
if dateKey != previousDateKey:
output[previousDateKey] = currentWeek
previousDateKey = dateKey
currentWeek = defaultdict(lambda: 0)
currentWeek[actionsKey] += actionCount
with open(path.replace('sessions.old', 'sessions'), 'w') as o:
writer = csv.writer(o, delimiter='\t')
writer.writerows((
[d, k, v]
for d, a in output.items()
for k, v in a.items()
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment