Last active
January 9, 2018 17:40
-
-
Save abluescarab/61f6f01e13b15f4faea12b2b05175847 to your computer and use it in GitHub Desktop.
Convert exported Mechanical Turk JSON files to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import csv | |
import argparse | |
# ------------------------------------------------------------------------------ | |
# rearrange these headers to change order | |
headers = ["date", "title", "state", "reward", "requester_id", "requester_name", | |
"requester_feedback"] | |
# ------------------------------------------------------------------------------ | |
# create a new argument parser | |
parser = argparse.ArgumentParser() | |
# add an argument | |
parser.add_argument("file", type=str, help="file to convert") | |
def main(): | |
# parse arguments | |
args = parser.parse_args() | |
# open the json file for reading | |
with open(args.file, 'r') as file: | |
lines = file.readlines() | |
# deserialize the json, then capture only the "hits" list | |
hits = json.loads('\n'.join(lines))["hits"] | |
# get the csv filename from the json filename | |
csvfile = os.path.splitext(args.file)[0] + ".csv" | |
# check if the csv file already exists | |
if os.path.isfile(csvfile): | |
overwrite = "" | |
while overwrite != "y" and overwrite != "yes": | |
overwrite = input("File \"" + os.path.basename(csvfile) + | |
"\" already exists. Overwrite? (y/n) ") | |
overwrite = overwrite.lower() | |
if overwrite == "n" or overwrite == "no": | |
print("Cancelled conversion.") | |
return | |
# open the csv file for writing | |
with open(csvfile, 'w', newline='') as file: | |
# create a new csv file writer | |
writer = csv.writer(file, delimiter=',') | |
titled_headers = [] | |
# convert headers to title case and replace underscores with spaces | |
for header in headers: | |
titled_headers.append(header.replace('_', ' ').title()) | |
# write the header row | |
writer.writerow(titled_headers) | |
for hit in hits: | |
# if the hit is abandoned or returned, skip it | |
if hit["state"] == "Abandoned" or hit["state"] == "Returned": | |
continue | |
line = [] | |
for header in headers: | |
# if the header exists in the hit, | |
if header in hit.keys(): | |
# format differently for a few headers | |
if header is "reward": | |
line.append(str(hit[header]["amount_in_dollars"])) | |
elif header is "date": | |
# capture the year, month, and day, | |
year = hit[header][:4] | |
month = hit[header][4:6] | |
day = hit[header][6:8] | |
# then format with slashes | |
line.append(month + "/" + day + "/" + year) | |
else: | |
# just convert to a string for all other headers | |
line.append(str(hit[header])) | |
# if the header doesn't exist, just include a blank area | |
else: | |
line.append("") | |
# write the array as a csv line | |
writer.writerow(line) | |
print("Converted " + str(len(hits)) + " hits to file \"" + | |
os.path.basename(csvfile) + "\".") | |
# run the program | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment