Skip to content

Instantly share code, notes, and snippets.

@springcoil
Last active February 12, 2019 10:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save springcoil/200f5c9d8f18881c0484d024a2b33919 to your computer and use it in GitHub Desktop.
Save springcoil/200f5c9d8f18881c0484d024a2b33919 to your computer and use it in GitHub Desktop.
automated_event_tracking.py
import json
from collections import Counter, defaultdict
import pandas as pd
import csv
def query():
# TODO this script doesn't work
# Needs credentials and a connection to a database added to it added to it
SQL = """SELECT event + '_' + platform as event_platform, params, ts from snowplow_standard_busuu_events
WHERE ts between '2019/01/16' and current_date
LIMIT 100000;"""
return SQL
def collect_items(dataframe):
"""
Takes in a csv that has event_platform and then a column called params
Parses the params column and returns a table of the following for automated event tracking
The columns are Event, Platform, Event Property (param), Event property Value, Count
"""
counts = defaultdict(Counter)
df = pd.read_csv(dataframe)
df["params"] = df["params"].apply(json.loads)
for event_platform, ep_df in df.groupby("event_platform"):
for params in ep_df["params"]:
for key, value in params.items():
key = json.dumps(key)
value = json.dumps(value)
counts[event_platform][(key, value)] += 1
items_to_collect = []
for event_platform, ep_counts in counts.items():
item_length = len(counts)
print("===", event_platform, "===")
for kv_pair, count in ep_counts.items():
print(count, kv_pair)
items_to_collect.append([event_platform, kv_pair[0], kv_pair[1], count])
print()
return items_to_collect
def write_list_to_file(yourlist, filename):
"""Write the list to csv file.
TODO: This needs some work and some testing
"""
pd_2 = pd.DataFrame(yourlist)
pd_2.to_csv(filename)
if __name__ == "__main__":
items_to_collect = collect_items(
"/Users/peadarcoyle/Documents/event_platform_results.csv"
)
write_list_to_file(
items_to_collect,
"/Users/peadarcoyle/Documents/event_platform_results_output.csv",
)
@springcoil
Copy link
Author

springcoil commented Feb 12, 2019

The csv input is basically some of the query - and is of the form like this.

" activity_finished_ web","{""country"": ""TR"", ""objective_id"": ""objective_de_20131106_5"", ""unit_id"": ""1_1_7__de"", ""activity_id"": ""activity_1_1_7__de__2"", ""language_learnt"": ""de"", ""score"": 100, ""activity_result"": ""pass"", ""num_gradable_entities"": 7, ""num_gradable_entities_taken"": 7, ""NILE"": false}"
" activity_finished_iOS","{""unit_id"": ""21_11_7_1__es"", ""activity_result"": ""pass"", ""apptimise_experiments_current"": {""40850"": ""1038411"", ""41592"": ""1063011"", ""47862"": ""1276101"", ""50147"": ""1352181"", ""50870"": ""1375441"", ""31281"": ""702161"", ""52294"": ""1419871""}, ""score"": 100, ""activity_id"": ""activity_21_11_7_1__es__2"", ""objective_id"": ""objective_es_20131106_5"", ""num_gradable_entities_taken"": 9, ""num_gradable_entities"": 9, ""acquisition_channel"": ""Organic""}"
" activity_finished_iOS","{""unit_id"": ""1_1_7_2__es"", ""activity_result"": ""pass"", ""apptimise_experiments_current"": {""40850"": ""1038411"", ""41592"": ""1063011"", ""47862"": ""1276101"", ""50147"": ""1352181"", ""50870"": ""1375441"", ""31281"": ""702161"", ""52294"": ""1419871""}, ""score"": 100, ""activity_id"": ""activity_1_1_7_2__es__3"", ""objective_id"": ""objective_es_20131106_5"", ""num_gradable_entities_taken"": 10, ""num_gradable_entities"": 9, ""acquisition_channel"": ""Organic""}"
" activity_finished_iOS","{""unit_id"": ""1_1_1_3__es"", ""activity_result"": ""pass"", ""apptimise_experiments_current"": {""31281"": ""702161"", ""56084"": ""1539321"", ""47862"": ""1276101"", ""50147"": ""1352181"", ""50870"": ""1375441"", ""41592"": ""1063011"", ""40850"": ""1038431"", ""52294"": ""1419871""}, ""score"": 100, ""activity_id"": ""activity_1_1_1_3__es__3"", ""objective_id"": ""objective_es_a1_20171018_1"", ""num_gradable_entities_taken"": 8, ""num_gradable_entities"": 7, ""acquisition_channel"": ""Organic""}"
" activity_finished_iOS","{""unit_id"": ""1_1_8_1__es"", ""activity_result"": ""pass"", ""apptimise_experiments_current"": {""40850"": ""1038411"", ""41592"": ""1063011"", ""47862"": ""1276101"", ""50147"": ""1352181"", ""50870"": ""1375441"", ""31281"": ""702161"", ""52294"": ""1419871""}, ""score"": 100, ""activity_id"": ""activity_1_1_8_1__es__2"", ""objective_id"": ""objective_es_20131106_6"", ""num_gradable_entities_taken"": 8, ""num_gradable_entities"": 8, ""acquisition_channel"": ""Organic""}"
" activity_finished_iOS","{""activity_result"": ""pass"", ""unit_id"": ""18_1_1__enc"", ""activity_type"": ""vocab"", ""score"": 85, ""apptimise_experiments_current"": {""31281"": ""702161"", ""56084"": ""1539331"", ""47862"": ""1276101"", ""50147"": ""1352181"", ""50870"": ""1375441"", ""41592"": ""1063011"", ""40850"": ""1038411"", ""52294"": ""1419871""}, ""activity_id"": ""activity_18_1_1__enc__1"", ""objective_id"": ""objective_enc_20160215_1"", ""num_gradable_entities_taken"": 9, ""num_gradable_entities"": 7, ""acquisition_channel"": ""PRO Website::inditex""}"```

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment