Skip to content

Instantly share code, notes, and snippets.

@absynthe
Created July 21, 2014 13:13
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save absynthe/b08ae60d567108ac663a to your computer and use it in GitHub Desktop.
Save absynthe/b08ae60d567108ac663a to your computer and use it in GitHub Desktop.
Script for splitting a raw GameAnalytics merged data file into event categories files
#!/usr/bin/python
import os.path
import csv
import cjson
import sys
import gzip
import ntpath
def write_event(prefix, event_type, headers, event):
data = prepare_data(headers, event)
path = "%s_%s.csv" % (prefix, event_type)
with open(path, 'ab') as fp:
cw = csv.writer(fp, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
# write headers only to empty file
if os.stat(path)[6] == 0:
cw.writerow(headers)
cw.writerow(data)
def prepare_data(headers, event):
result = []
for header in headers:
if "." in header:
# handle 2nd level fields
keys = header.split(".")
if keys[0] in event:
if keys[1] == "revenue":
revenue = event[keys[0]][keys[1]]
if revenue == {}:
result.append("")
else:
for revenue_key in revenue:
result.append("%s %d" % (revenue_key, revenue[revenue_key]))
else:
result.append("")
else:
if keys[0] in event:
if keys[1] in event[keys[0]]:
result.append(event[keys[0]][keys[1]])
else:
result.append("")
else:
result.append("")
else:
# handle 1st level fields
if header in event:
result.append(event[header])
else:
result.append("")
return tuple(result)
#TODO: check with collectors validation logic for missing fields
def get_csv_header(event_type):
header = []
common_data_fields = ["data.session_id", "data.user_id", "data.build"]
common_fields = ["country_code","arrival_ts","game_id"]
common_user_meta_fields = ["user_meta.install_ts","user_meta.revenue"]
if event_type == "quality":
header = common_data_fields
header += ["data.value","data.event_id","data.area"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "design":
header = common_data_fields
header += ["data.area","data.event_id","data.message"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version","user_meta.gender"]
header += common_user_meta_fields
elif event_type == "error":
header = common_data_fields
header += ["data.severity","data.x","data.y","data.z","data.area",
"data.message"]
header += common_fields
header += ["user_meta.gender"]
header += common_user_meta_fields
elif event_type == "user":
header = common_data_fields
header += ["data.device","data.os_major","data.os_minor",
"data.platform","data.sdk_version"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "business":
header = common_data_fields
header += ["data.event_id","data.area"]
header += common_fields + ["currency", "amount"]
header += common_user_meta_fields
return header
def main():
if len(sys.argv) < 2:
print "Usage: ./events2csv.py <source_file>"
else:
source_name = sys.argv[1]
if os.path.isfile(source_name):
print "Uncompressing file..."
with gzip.open(source_name) as f:
file_prefix = os.path.splitext(
ntpath.basename(source_name))[0].replace(".json", "")
print "Reading events from file..."
i = 0
for raw_event in f:
event = cjson.decode(raw_event.encode("ascii", "ignore"))
headers = get_csv_header(event["category"])
# report progress
sys.stdout.write("\r%d rows written..." % i)
sys.stdout.flush()
write_event(file_prefix, event["category"], headers, event)
i = i + 1
print "\nDone"
else:
print "Supplied source file does not exists!"
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment