Skip to content

Instantly share code, notes, and snippets.

@abluescarab
Last active January 9, 2018 17:40
Show Gist options
  • Save abluescarab/61f6f01e13b15f4faea12b2b05175847 to your computer and use it in GitHub Desktop.
Save abluescarab/61f6f01e13b15f4faea12b2b05175847 to your computer and use it in GitHub Desktop.
Convert exported Mechanical Turk JSON files to CSV
import os
import json
import csv
import argparse
# ------------------------------------------------------------------------------
# rearrange these headers to change order
headers = ["date", "title", "state", "reward", "requester_id", "requester_name",
"requester_feedback"]
# ------------------------------------------------------------------------------
# create a new argument parser
parser = argparse.ArgumentParser()
# add an argument
parser.add_argument("file", type=str, help="file to convert")
def main():
# parse arguments
args = parser.parse_args()
# open the json file for reading
with open(args.file, 'r') as file:
lines = file.readlines()
# deserialize the json, then capture only the "hits" list
hits = json.loads('\n'.join(lines))["hits"]
# get the csv filename from the json filename
csvfile = os.path.splitext(args.file)[0] + ".csv"
# check if the csv file already exists
if os.path.isfile(csvfile):
overwrite = ""
while overwrite != "y" and overwrite != "yes":
overwrite = input("File \"" + os.path.basename(csvfile) +
"\" already exists. Overwrite? (y/n) ")
overwrite = overwrite.lower()
if overwrite == "n" or overwrite == "no":
print("Cancelled conversion.")
return
# open the csv file for writing
with open(csvfile, 'w', newline='') as file:
# create a new csv file writer
writer = csv.writer(file, delimiter=',')
titled_headers = []
# convert headers to title case and replace underscores with spaces
for header in headers:
titled_headers.append(header.replace('_', ' ').title())
# write the header row
writer.writerow(titled_headers)
for hit in hits:
# if the hit is abandoned or returned, skip it
if hit["state"] == "Abandoned" or hit["state"] == "Returned":
continue
line = []
for header in headers:
# if the header exists in the hit,
if header in hit.keys():
# format differently for a few headers
if header is "reward":
line.append(str(hit[header]["amount_in_dollars"]))
elif header is "date":
# capture the year, month, and day,
year = hit[header][:4]
month = hit[header][4:6]
day = hit[header][6:8]
# then format with slashes
line.append(month + "/" + day + "/" + year)
else:
# just convert to a string for all other headers
line.append(str(hit[header]))
# if the header doesn't exist, just include a blank area
else:
line.append("")
# write the array as a csv line
writer.writerow(line)
print("Converted " + str(len(hits)) + " hits to file \"" +
os.path.basename(csvfile) + "\".")
# run the program
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment