Skip to content

Instantly share code, notes, and snippets.

@jribnik
Last active February 4, 2016 17:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jribnik/09cd23222b58a46160d3 to your computer and use it in GitHub Desktop.
Save jribnik/09cd23222b58a46160d3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import dateutil.parser
import pymongo
import sys
if len(sys.argv) != 2:
print("usage: %s csvfile" % sys.argv[0])
sys.exit(1)
# Open the CSV file
try:
csvfile = open(sys.argv[1], 'r')
except IOError as e:
print("error: unable to read %s" % sys.argv[1])
sys.exit(2)
# Connect to MongoDB
try:
mongo = pymongo.MongoClient()
# NOTE dashes in collection names are problematic in the shell, hence the
# conversion to underscores
collection = mongo.nfl[sys.argv[1].split('.')[0].replace('-', '_')]
except pymongo.errors.PyMongoError as e:
print("error: unable to connect to MongoDB")
sys.exit(3)
try:
reader = csv.reader(csvfile, escapechar='\\')
# The header line contains field names
keys = reader.next()
# The CSV contains multiple null fields. We are removing these by
# identifying their positions and skipping their positions on each line.
nulls = {int(i) for i in range(len(keys)) if keys[i] == ""}
for vals in reader:
try:
assert(len(keys) == len(vals))
except AssertionError as e:
print("error: unable to process %s as csv" % sys.argv[1])
sys.exit(4)
doc = {}
for i in range(len(vals)):
# Is this a null position?
if i in nulls:
continue
val = vals[i]
# Convert GameDate string to datetime
if keys[i] == "GameDate":
try:
val = dateutil.parser.parse(val)
except ValueError as e:
pass
else:
# Save integers as such instead of strings
try:
val = int(val)
except ValueError as e:
pass
doc[keys[i]] = val
collection.insert_one(doc)
except csv.Error as e:
print("error: " + e)
sys.exit(5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment