Skip to content

Instantly share code, notes, and snippets.

@felixixen
Forked from absynthe/events2csv
Last active May 16, 2019 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save felixixen/c2e881a8c83d6eb8e0e0 to your computer and use it in GitHub Desktop.
Save felixixen/c2e881a8c83d6eb8e0e0 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-”
import os.path
import csv
import cjson
import sys
import gzip
import ntpath
def write_event(prefix, event_type, headers, event):
data = prepare_data(headers, event)
path = "%s_%s.csv" % (prefix, event_type)
with open(path, 'ab') as fp:
cw = csv.writer(fp, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC)
# write headers only to empty file
if os.stat(path)[6] == 0:
cw.writerow(headers)
cw.writerow(data)
def prepare_data(headers, event):
result = []
for header in headers:
if "." in header:
# handle 2nd level fields
keys = header.split(".")
if keys[0] in event:
if keys[1] == "revenue":
revenue = event[keys[0]][keys[1]]
if revenue == {}:
result.append("")
else:
for revenue_key in revenue:
result.append("%s %d" % (revenue_key, revenue[revenue_key]))
else:
if keys[0] in event:
if keys[1] in event[keys[0]]:
result.append(event[keys[0]][keys[1]])
else:
result.append("")
else:
if keys[0] in event:
if keys[1] in event[keys[0]]:
result.append(event[keys[0]][keys[1]])
else:
result.append("")
else:
result.append("")
else:
# handle 1st level fields
if header in event:
result.append(event[header])
else:
result.append("")
return tuple(result)
#TODO: check with collectors validation logic for missing fields
def get_csv_header(event_type):
header = []
common_data_fields = ["data.session_id", "data.user_id", "data.build"]
common_fields = ["country_code","arrival_ts","game_id"]
common_user_meta_fields = ["user_meta.install_ts","user_meta.revenue"]
if event_type == "quality":
header = common_data_fields
header += ["data.value","data.event_id","data.area"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "design":
header = common_data_fields
header += ["data.area","data.event_id","data.message"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version","user_meta.gender"]
header += common_user_meta_fields
elif event_type == "error":
header = common_data_fields
header += ["data.severity","data.x","data.y","data.z","data.area",
"data.message"]
header += common_fields
header += ["user_meta.gender"]
header += common_user_meta_fields
elif event_type == "user":
header = common_data_fields
header += ["data.device","data.os_major","data.os_minor",
"data.platform","data.sdk_version"]
header += common_fields
header += ["user_meta.platform","user_meta.device",
"user_meta.os_major", "user_meta.os_minor",
"user_meta.sdk_version"]
header += common_user_meta_fields
elif event_type == "business":
header = common_data_fields
header += ["data.event_id","data.area"]
header += common_fields + ["currency", "amount"]
header += common_user_meta_fields
return header
def main():
if len(sys.argv) < 2:
print "Usage: ./events2csv.py <source_file>"
else:
source_name = sys.argv[1]
if os.path.isfile(source_name):
print "Uncompressing file..."
with gzip.open(source_name) as f:
file_prefix = os.path.splitext(
ntpath.basename(source_name))[0].replace(".json", "")
print "Reading events from file..."
i = 0
for raw_event in f:
event = cjson.decode(raw_event.decode("ascii", "ignore"))
headers = get_csv_header(event["category"])
# report progress
sys.stdout.write("\r%d rows written..." % i)
sys.stdout.flush()
write_event(file_prefix, event["category"], headers, event)
i = i + 1
print "\nDone"
else:
print "Supplied source file does not exists!"
if __name__ == '__main__':
main()
@felixixen
Copy link
Author

Fixed problem with code json and create csv with all field

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment