Last active
August 29, 2015 14:06
-
-
Save alexwoolford/410530a345ebe46456a2 to your computer and use it in GitHub Desktop.
Denver Public Schools: find the closest schools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from bs4 import BeautifulSoup | |
import urllib2 | |
import re | |
from pygeocoder import Geocoder | |
import time | |
from pymongo import MongoClient | |
# Get the HTML for the school list and create BeautifulSoup object. | |
soup = BeautifulSoup(urllib2.urlopen('http://www.dpsk12.org/schoollist/default.aspx').read()) | |
# Each school has an ID number that's listed in parenthesis after the name. The regular expression will | |
# be used to strip off the ID number from the name. | |
schoolNamePattern = re.compile(r'(.*)\([0-9]+\)') | |
# Get the name, address, and phone number for each school. | |
schoolDict = dict() | |
for td in soup.find('table', {'id':'mytable'}).findAll('a', {'class':'tooltip'}): | |
id, name, address, phone = td['id'], td.text, td.findNext('td').text, td.findNext('td').findNext('td').text | |
schoolDict[id] = {'name':schoolNamePattern.findall(name)[0].strip(), 'address':address, 'phone':phone, 'id':id} | |
# Geocode the addresses. | |
for id in schoolDict.keys(): | |
time.sleep(0.2) | |
schoolNameAddress = ', '.join([schoolDict[id]['name'], schoolDict[id]['address'], 'Denver, Colorado, USA']) | |
try: | |
geodata = Geocoder.geocode(schoolNameAddress).__dict__ | |
except: | |
print "no geodata for", schoolNameAddress | |
geodata = None | |
schoolDict[id]['geodata'] = geodata | |
# Load all the address data into MongoDB | |
client = MongoClient() | |
db = client.dots | |
collection = db.schools | |
for key, value in schoolDict.iteritems(): | |
collection.insert(value) | |
# MongoDB's geoWithin filter expects the location to be listed longitude, then latitude. Create a long/lat attribute called | |
# 'location' which will be used by the filter. | |
for record in collection.find(): | |
location = record['geodata']['data'][0]['geometry']['location'] | |
location = [location['lng'], location['lat']] | |
collection.update({'_id':record['_id']}, {'$set':{'location': location}}, upsert=False, multi=False) | |
# The getSchools function takes a search location and search radius as arguments, then returns a list of the schools within | |
# that radius of the searchLocation. | |
def getSchools(searchLocation, radius): | |
location = Geocoder.geocode(searchLocation).data[0]['geometry']['location'] | |
schools = [] | |
for school in collection.find({ 'location' : { '$geoWithin' : { '$centerSphere' : [ [ location['lng'] , location['lat'] ] , float(radius) / 3959 ] } } } ): | |
schools.append((school['name'], school['address'])) | |
return schools | |
# For example: | |
getSchools('Denver Museum of Nature and Science', 1) | |
# returns all the schools within 1 mile of the Denver Museum of Nature and Science: | |
# [(u'Teller', u'1150 Garfield St. 80206-3513'), | |
# (u'Park Hill', u'5050 E. 19th Ave. 80220-1229'), | |
# (u'Stedman', u'2940 Dexter St. 80207-2643'), | |
# (u'P.R.E.P. Academy HS', u'2727 Columbine St. 80205-3709'), | |
# (u'East', u'1600 City Park Esplanade 80206-1508'), | |
# (u'Barrett', u'2900 Richard Allen Court 80205-4969'), | |
# (u'P.R.E.P. Academy MS', u'2727 Columbine St. 80205-3709')] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment