Skip to content

Instantly share code, notes, and snippets.

@danicarrion
Last active January 12, 2016 15:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danicarrion/b4fa049c4e4b7cee5289 to your computer and use it in GitHub Desktop.
Save danicarrion/b4fa049c4e4b7cee5289 to your computer and use it in GitHub Desktop.
Script that reads from a CartoDB table, geocode each row with Google's geocoder and put them back into CartoDB

This script reads a table fom CartoDB using the SQL API, geocodes one of its fields using Google's geocoder and updates the data on CartoDB.

If no valid credentials are provided for Google's geocoder, the free limits will apply.

TODO: multithread support

[cartodb]
account=test
api_key=27a4f4e9243fL4531caf8225aMN19632ce80ff7a
table=cities
field=name
buffer_size=200
[google]
api_key=
client_id=
private_key=
import ConfigParser
import sys
from cartodb import CartoDBAPIKey, CartoDBException
from pygeocoder import Geocoder
config = ConfigParser.RawConfigParser()
config.read("geocoder.conf")
def chunks(l, n):
"""
Yield successive n-sized chunks from l.
"""
for i in xrange(0, len(l), n):
yield l[i:i+n]
if __name__ == '__main__':
api_key = config.get('cartodb', 'api_key')
account = config.get('cartodb', 'account')
table = config.get('cartodb', 'table')
field = config.get('cartodb', 'field')
buffer_size = int(config.get('cartodb', 'buffer_size'))
api_key_google = config.get('google', 'api_key') or None
client_id_google = config.get('google', 'client_id') or None
private_key_google = config.get('google', 'private_key') or None
google_geocoder = Geocoder(api_key=api_key_google, client_id=client_id_google, private_key=private_key_google)
client = CartoDBAPIKey(api_key, account)
try:
response = client.sql('select cartodb_id, {field} from {table}'.format(field=field, table=table))
except CartoDBException as e:
sys.stderr.write("Error: {error}".format(error=e))
sys.exit(1)
for rows in chunks(response["rows"], buffer_size):
values = ""
for row in rows:
results = google_geocoder.geocode(row[field])
values += "({cartodb_id}, {lat}, {lng}),".format(cartodb_id=row["cartodb_id"], lat=results[0].coordinates[0],
lng=results[0].coordinates[1])
if values:
values = values[:-1]
query = "UPDATE {table} set the_geom=CDB_LatLng(data.lat, data.lng) from (values {values}) data(cartodb_id, lat, lng) " \
"where {table}.cartodb_id=data.cartodb_id;".format(table=table, values=values)
client.sql(query)
sys.stdout.write("Done.\n")
cartodb==0.8.1
pygeocoder==1.2.5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment