Last active
February 4, 2016 17:27
-
-
Save jribnik/09cd23222b58a46160d3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import dateutil.parser | |
import pymongo | |
import sys | |
if len(sys.argv) != 2: | |
print("usage: %s csvfile" % sys.argv[0]) | |
sys.exit(1) | |
# Open the CSV file | |
try: | |
csvfile = open(sys.argv[1], 'r') | |
except IOError as e: | |
print("error: unable to read %s" % sys.argv[1]) | |
sys.exit(2) | |
# Connect to MongoDB | |
try: | |
mongo = pymongo.MongoClient() | |
# NOTE dashes in collection names are problematic in the shell, hence the | |
# conversion to underscores | |
collection = mongo.nfl[sys.argv[1].split('.')[0].replace('-', '_')] | |
except pymongo.errors.PyMongoError as e: | |
print("error: unable to connect to MongoDB") | |
sys.exit(3) | |
try: | |
reader = csv.reader(csvfile, escapechar='\\') | |
# The header line contains field names | |
keys = reader.next() | |
# The CSV contains multiple null fields. We are removing these by | |
# identifying their positions and skipping their positions on each line. | |
nulls = {int(i) for i in range(len(keys)) if keys[i] == ""} | |
for vals in reader: | |
try: | |
assert(len(keys) == len(vals)) | |
except AssertionError as e: | |
print("error: unable to process %s as csv" % sys.argv[1]) | |
sys.exit(4) | |
doc = {} | |
for i in range(len(vals)): | |
# Is this a null position? | |
if i in nulls: | |
continue | |
val = vals[i] | |
# Convert GameDate string to datetime | |
if keys[i] == "GameDate": | |
try: | |
val = dateutil.parser.parse(val) | |
except ValueError as e: | |
pass | |
else: | |
# Save integers as such instead of strings | |
try: | |
val = int(val) | |
except ValueError as e: | |
pass | |
doc[keys[i]] = val | |
collection.insert_one(doc) | |
except csv.Error as e: | |
print("error: " + e) | |
sys.exit(5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment