Skip to content

Instantly share code, notes, and snippets.

@bryanhelmig
Last active February 8, 2017 23:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryanhelmig/11c2c23db8066d2b2fd1349e4a884b00 to your computer and use it in GitHub Desktop.
Save bryanhelmig/11c2c23db8066d2b2fd1349e4a884b00 to your computer and use it in GitHub Desktop.
Counts of users' events by day
"""
users if active or not on a given day plus counts of events
"""
import os
import csv
from collections import defaultdict
from collections import Counter
UFILE = 'data/users.csv'
EFILE = 'data/events.csv'
OUTFILE = 'out/third.csv'
def csv_to_list(fn):
with open(fn) as f:
reader = csv.reader(f)
return list(reader)
if __name__ == '__main__':
events = csv_to_list(EFILE)
users = csv_to_list(UFILE)
all_dates = {e[2].split()[0] for e in events[1:]}
all_uids = {u[0] for u in users[1:]}
all_events = {e[1] for e in events[1:]}
user_dates = defaultdict(lambda: defaultdict(Counter))
for e in events[1:]:
user_dates[e[0]][e[2].split()[0]][e[1]] += 1
# user_dates userid -> date -> event -> count
sorted_etypes = sorted(e for e in all_events)
count_headers = [f'count_{e}' for e in sorted_etypes]
event_string = ','.join(count_headers)
outlines = [f'user_id,date,active,{event_string}\n']
for uid in sorted(all_uids):
for date in sorted(all_dates):
try:
active = 't' if date in user_dates[uid] else 'f'
except KeyError:
active = 'f'
count_string = ','.join(str(user_dates[uid][date][e]) for e in
sorted_etypes)
outlines.append(','.join([uid, date, active, count_string]) +'\n')
os.makedirs(os.path.dirname(OUTFILE), exist_ok=True)
with open(OUTFILE, 'w') as f:
f.writelines(outlines)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment