Skip to content

Instantly share code, notes, and snippets.

@handcircus
Created February 14, 2019 07:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save handcircus/184e8d47feec2037cb01429ce760dafd to your computer and use it in GitHub Desktop.
Save handcircus/184e8d47feec2037cb01429ce760dafd to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import os.path
import csv
import cjson
import sys
import gzip
import ntpath
def write_event(prefix, event_type, headers, event):
data = prepare_data(headers, event)
path = "%s_%s.csv" % (prefix, event_type)
with open(path, 'ab') as fp:
cw = csv.writer(fp, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
# write headers only to empty file
if os.stat(path)[6] == 0:
cw.writerow(headers)
cw.writerow(data)
def prepare_data(headers, event):
result = []
for header in headers:
if "." in header:
# handle 2nd level fields
keys = header.split(".")
if keys[0] in event:
if keys[1] == "revenue":
revenue = event[keys[0]][keys[1]]
if revenue == {}:
result.append("")
else:
for revenue_key in revenue:
result.append("%s %d" % (revenue_key, revenue[revenue_key]))
else:
#print " Looking for '"+keys[0]+"' sub '"+keys[1]+"'"
if keys[1] in event[keys[0]]:
#print " Found '"+keys[0]+"' sub '"+keys[1]+"' : '"+str(event[keys[0]][keys[1]])+"'"
result.append(event[keys[0]][keys[1]])
else:
#print "Can't find '"+keys[0]+"' sub '"+keys[1]+"'"
result.append("")
else:
if keys[0] in event:
if keys[1] in event[keys[0]]:
result.append(event[keys[0]][keys[1]])
else:
result.append("")
else:
result.append("")
else:
# handle 1st level fields
if header in event:
result.append(event[header])
else:
result.append("")
return tuple(result)
#TODO: check with collectors validation logic for missing fields
def get_csv_header(event_type):
header = []
common_data_fields = ["data.event_id","data.session_id", "data.user_id", "data.build",
"data.device","data.platform","data.os_version",
"data.client_ts"]
common_fields = ["country_code","arrival_ts","game_id","ip"]
common_user_meta_fields = ["user_meta.install_ts","user_meta.revenue"]
if event_type == "quality":
header = common_data_fields
header += ["data.value","data.event_id"]
header += common_fields
header += ["user_meta.platform","data.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "design":
header = common_data_fields
header += ["data.value"]
header += common_fields
header += common_user_meta_fields
elif event_type == "progression":
header = common_data_fields
header += ["data.attempt_num"]
header += common_fields
header += common_user_meta_fields
elif event_type == "session_end":
header = common_data_fields
header += ["data.attempt_num"]
header += common_fields
header += common_user_meta_fields
elif event_type == "error":
header = common_data_fields
header += ["data.severity","data.x","data.y","data.z",
"data.message"]
header += common_fields
#header += ["user_meta.gender"]
header += common_user_meta_fields
elif event_type == "user":
header = common_data_fields
header += ["data.os_major","data.os_minor",
"data.platform","data.sdk_version"]
header += common_fields
#header += ["user_meta.platform",
# "user_meta.os_major", "user_meta.os_minor",
# "user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "business":
header = common_data_fields
header += ["data.event_id"]
header += common_fields + ["currency", "amount"]
header += common_user_meta_fields
return header
def main():
if len(sys.argv) < 2:
print "Usage: ./events2csv.py <source_file>"
else:
source_name = sys.argv[1]
if os.path.isfile(source_name):
print "Uncompressing file..."
with gzip.open(source_name) as f:
file_prefix = os.path.splitext(
ntpath.basename(source_name))[0].replace(".json", "")
print "Reading events from file..."
i = 0
for raw_event in f:
event = cjson.decode(raw_event.encode("ascii", "ignore"))
event_data=event["data"]
# for entry in event:
# print "Event entry " + entry
# print "Event category '" + event_data["category"] +"' ID '" + event_data["event_id"] + "'"
headers = get_csv_header(event_data["category"])
# report progress
sys.stdout.write("\r%d rows written..." % i)
sys.stdout.flush()
write_event(file_prefix, event_data["category"], headers, event)
i = i + 1
print "\nDone"
else:
print "Supplied source file does not exists!"
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment