-
-
Save springcoil/200f5c9d8f18881c0484d024a2b33919 to your computer and use it in GitHub Desktop.
automated_event_tracking.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from collections import Counter, defaultdict | |
import pandas as pd | |
import csv | |
def query(): | |
# TODO this script doesn't work | |
# Needs credentials and a connection to a database added to it added to it | |
SQL = """SELECT event + '_' + platform as event_platform, params, ts from snowplow_standard_busuu_events | |
WHERE ts between '2019/01/16' and current_date | |
LIMIT 100000;""" | |
return SQL | |
def collect_items(dataframe): | |
""" | |
Takes in a csv that has event_platform and then a column called params | |
Parses the params column and returns a table of the following for automated event tracking | |
The columns are Event, Platform, Event Property (param), Event property Value, Count | |
""" | |
counts = defaultdict(Counter) | |
df = pd.read_csv(dataframe) | |
df["params"] = df["params"].apply(json.loads) | |
for event_platform, ep_df in df.groupby("event_platform"): | |
for params in ep_df["params"]: | |
for key, value in params.items(): | |
key = json.dumps(key) | |
value = json.dumps(value) | |
counts[event_platform][(key, value)] += 1 | |
items_to_collect = [] | |
for event_platform, ep_counts in counts.items(): | |
item_length = len(counts) | |
print("===", event_platform, "===") | |
for kv_pair, count in ep_counts.items(): | |
print(count, kv_pair) | |
items_to_collect.append([event_platform, kv_pair[0], kv_pair[1], count]) | |
print() | |
return items_to_collect | |
def write_list_to_file(yourlist, filename): | |
"""Write the list to csv file. | |
TODO: This needs some work and some testing | |
""" | |
pd_2 = pd.DataFrame(yourlist) | |
pd_2.to_csv(filename) | |
if __name__ == "__main__": | |
items_to_collect = collect_items( | |
"/Users/peadarcoyle/Documents/event_platform_results.csv" | |
) | |
write_list_to_file( | |
items_to_collect, | |
"/Users/peadarcoyle/Documents/event_platform_results_output.csv", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The csv input is basically some of the query - and is of the form like this.