Skip to content

Instantly share code, notes, and snippets.

@alexwoolford
Last active August 29, 2015 14:06
Show Gist options
  • Save alexwoolford/410530a345ebe46456a2 to your computer and use it in GitHub Desktop.
Save alexwoolford/410530a345ebe46456a2 to your computer and use it in GitHub Desktop.
Denver Public Schools: find the closest schools
#!/usr/bin/python
from bs4 import BeautifulSoup
import urllib2
import re
from pygeocoder import Geocoder
import time
from pymongo import MongoClient
# Get the HTML for the school list and create BeautifulSoup object.
soup = BeautifulSoup(urllib2.urlopen('http://www.dpsk12.org/schoollist/default.aspx').read())
# Each school has an ID number that's listed in parenthesis after the name. The regular expression will
# be used to strip off the ID number from the name.
schoolNamePattern = re.compile(r'(.*)\([0-9]+\)')
# Get the name, address, and phone number for each school.
schoolDict = dict()
for td in soup.find('table', {'id':'mytable'}).findAll('a', {'class':'tooltip'}):
id, name, address, phone = td['id'], td.text, td.findNext('td').text, td.findNext('td').findNext('td').text
schoolDict[id] = {'name':schoolNamePattern.findall(name)[0].strip(), 'address':address, 'phone':phone, 'id':id}
# Geocode the addresses.
for id in schoolDict.keys():
time.sleep(0.2)
schoolNameAddress = ', '.join([schoolDict[id]['name'], schoolDict[id]['address'], 'Denver, Colorado, USA'])
try:
geodata = Geocoder.geocode(schoolNameAddress).__dict__
except:
print "no geodata for", schoolNameAddress
geodata = None
schoolDict[id]['geodata'] = geodata
# Load all the address data into MongoDB
client = MongoClient()
db = client.dots
collection = db.schools
for key, value in schoolDict.iteritems():
collection.insert(value)
# MongoDB's geoWithin filter expects the location to be listed longitude, then latitude. Create a long/lat attribute called
# 'location' which will be used by the filter.
for record in collection.find():
location = record['geodata']['data'][0]['geometry']['location']
location = [location['lng'], location['lat']]
collection.update({'_id':record['_id']}, {'$set':{'location': location}}, upsert=False, multi=False)
# The getSchools function takes a search location and search radius as arguments, then returns a list of the schools within
# that radius of the searchLocation.
def getSchools(searchLocation, radius):
location = Geocoder.geocode(searchLocation).data[0]['geometry']['location']
schools = []
for school in collection.find({ 'location' : { '$geoWithin' : { '$centerSphere' : [ [ location['lng'] , location['lat'] ] , float(radius) / 3959 ] } } } ):
schools.append((school['name'], school['address']))
return schools
# For example:
getSchools('Denver Museum of Nature and Science', 1)
# returns all the schools within 1 mile of the Denver Museum of Nature and Science:
# [(u'Teller', u'1150 Garfield St. 80206-3513'),
# (u'Park Hill', u'5050 E. 19th Ave. 80220-1229'),
# (u'Stedman', u'2940 Dexter St. 80207-2643'),
# (u'P.R.E.P. Academy HS', u'2727 Columbine St. 80205-3709'),
# (u'East', u'1600 City Park Esplanade 80206-1508'),
# (u'Barrett', u'2900 Richard Allen Court 80205-4969'),
# (u'P.R.E.P. Academy MS', u'2727 Columbine St. 80205-3709')]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment