Last active
December 19, 2015 04:29
-
-
Save andymboyle/5897556 to your computer and use it in GitHub Desktop.
Management command for Django tutorial project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dateutil import parser | |
from django.core.management.base import BaseCommand | |
from optparse import make_option | |
from datetime import datetime | |
from gdocs_importer import logger | |
from gdocs_importer.models import Homicide | |
from gdocs_importer.lib import get_spreadsheet | |
GOOGLE_KEY = '' # Enter the Google key here | |
GOOGLE_SHEET = 0 # This is the Google spreadsheet sheet number you're looking at, and 0 is the default first one | |
GOOGLE_ACCOUNT = '' # Enter your Google account name here, blahwhatever@gmail.com | |
GOOGLE_PASS = '' # Enter your Google account password here | |
class Command(BaseCommand): | |
""" | |
Downloads and ingests all homicides | |
""" | |
option_list = BaseCommand.option_list + ( | |
make_option( | |
'-c', | |
'--clear', | |
action='store_true', | |
default=False, | |
dest='clear', | |
help='Clear all homicides in the DB.'), | |
make_option( | |
'-f', | |
'--first', | |
action='store_true', | |
default=False, | |
dest='first', | |
help='Run the script for the first time.'), | |
) | |
def get_version(self): | |
return "0.1" | |
def handle(self, *args, **options): | |
first_time = False | |
if options['clear']: | |
logger.info("Clearing all homicides from the DB.") | |
Homicide.objects.all().delete() | |
if options['first']: | |
first_time = True | |
self.init_reader(first_time) | |
def dict_for_row(self, row): | |
""" | |
Get a row of data, return a dict whose keys are Homicide properties | |
and values are instance values for that row. | |
""" | |
kwargs = { | |
'address': row[0], | |
'date': row[1], | |
'time': row[2], | |
'location': row[3], | |
'neighborhood': row[4], | |
'age': row[5], | |
'gender': row[6], | |
'race': row[7], | |
'name': row[8], | |
'cause': row[9], | |
'story_url': row[10], | |
'rd_number': row[11], | |
'charges_url': row[12], | |
} | |
return kwargs | |
def get_google_csv(self, key, sheet): | |
""" | |
Connect to the Google doc and return a dict of the data to be used, | |
including finding the header and the rest of the data | |
""" | |
data = list(get_spreadsheet( | |
GOOGLE_ACCOUNT, GOOGLE_PASS, key, sheet)) | |
return [self.dict_for_row(item) for item in data[1:]] | |
def init_reader(self, first_time): | |
""" | |
Loop through the spreadsheet and load the data | |
""" | |
homicide_csv = self.get_google_csv(GOOGLE_KEY, GOOGLE_SHEET) | |
already_exists = 0 | |
start_time = datetime.now() | |
# Run through the spreadsheet, assigning values to certain fields | |
for i, row in enumerate(homicide_csv): | |
address = row['address'] | |
# Send some fields to functions that clean them | |
cleaned_date_time = self.clean_date_time(row['date'], row['time']) | |
location = row['location'] | |
neighborhood = row['neighborhood'] | |
age = self.clean_age(row['age']) | |
gender = self.clean_gender(row['gender']) | |
race = row['race'] | |
name = row['name'] | |
cause = row['cause'] | |
story_url = self.clean_link(row['story_url']) | |
rd_number = row['rd_number'] | |
charges_url = self.clean_link(row['charges_url']) | |
# If the command doesn't use --first, run it this way. | |
if first_time is not True: | |
# Try and see if this homicide exists. If so, skip it. | |
try: | |
homicide = Homicide.objects.get( | |
address=address, | |
name=name) | |
already_exists = already_exists + 1 | |
logger.info('Already exists, skipping it.') | |
# If the homicide doesn't exist, create it. | |
except Homicide.DoesNotExist: | |
homicide = Homicide( | |
address=address, | |
date_time=cleaned_date_time, | |
location=location, | |
neighborhood=neighborhood, | |
age=age, | |
gender=gender, | |
race=race, | |
name=name, | |
cause=cause, | |
story_url=story_url, | |
rd_number=rd_number, | |
charges_url=charges_url) | |
homicide.save() | |
# If you're running it with --first, save everything. | |
else: | |
homicide = Homicide( | |
address=address, | |
date_time=cleaned_date_time, | |
location=location, | |
neighborhood=neighborhood, | |
age=age, | |
gender=gender, | |
race=race, | |
name=name, | |
cause=cause, | |
story_url=story_url, | |
rd_number=rd_number, | |
charges_url=charges_url) | |
homicide.save() | |
logger.info("Saved homicide at %s" % address) | |
logger.info("Skipped %s so far" % already_exists) | |
finish_time = datetime.now() | |
total_time = finish_time - start_time | |
logger.info( | |
"All done, took %s seconds to complete." % total_time.seconds) | |
logger.info("Skipped %s homicides." % already_exists) | |
def clean_date_time(self, date, time): | |
""" | |
Turn the date and time into one field that Django will understand | |
properly instead of being weird, just in case it was inputted wrong. | |
""" | |
try: | |
cleaned_date_time = parser.parse( | |
"%s %s" % (date, time), ignoretz=True) | |
except ValueError: | |
cleaned_date_time = None | |
return cleaned_date_time | |
def clean_age(self, age): | |
""" | |
Make sure it's an integer that's been entered, otherwise it'll break | |
everything. If not, ignore it. | |
""" | |
try: | |
isinstance(age, int) | |
age = int(age) | |
if age is None: | |
age = None | |
return age | |
else: | |
return age | |
except ValueError: | |
age = None | |
return age | |
def clean_gender(self, gender): | |
""" | |
Make sure it's a string that's been entered, otherwise it'll break | |
everything. If not, save it as an unknown gender | |
""" | |
try: | |
isinstance(gender, basestring) | |
except ValueError: | |
new_gender = "Unkown" | |
return new_gender | |
def clean_link(self, link): | |
""" | |
Django can only store URLs up to 200 characters, so if it's too long, | |
this will allow you to ignore the URL and not break your importer. | |
""" | |
try: | |
isinstance(link, basestring) | |
if len(link) > 200: | |
new_link = None | |
return new_link | |
elif link == "": | |
new_link = None | |
return new_link | |
else: | |
return link | |
except ValueError: | |
new_link = None | |
return new_link |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment