Skip to content

Instantly share code, notes, and snippets.

@scott2b
Last active October 10, 2017 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scott2b/3022add5dbaf162b6415a8a03a857c41 to your computer and use it in GitHub Desktop.
Save scott2b/3022add5dbaf162b6415a8a03a857c41 to your computer and use it in GitHub Desktop.
get geo coordinates for US cities with population 100,000+
#!/usr/bin/env python
import json
import requests
from bs4 import BeautifulSoup
import re
LATLNG = re.compile(r'^.*?(-?\d+\.\d+); (-?\d+\.\d+).*$', re.S)
CITY = re.compile(r'^(.*?)(?:\[\d+\])?$', re.S)
def main():
url = 'https://en.wikipedia.org/wiki/List_of_United_States_cities_by_population'
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
table = soup.select('table.wikitable.sortable')[0]
cities = []
for row in table.find_all('tr')[1:]:
rank, city, state, estimate_2016, census_2010, change, land_area_2016_mi,\
land_area_2016_km, pop_dens_2016_mi, pop_dens_2016_km, \
location = row.find_all('td')
city = CITY.match(city.text).group(1)
state = state.text[1:]
match = LATLNG.match(location.text)
lat = match.group(1)
lon = match.group(2)
cities.append({
'city': city,
'state': state,
'lat': lat,
'lon': lon
})
print(json.dumps({'cities': cities }, indent=4))
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment