Skip to content

Instantly share code, notes, and snippets.

@danicarrion
Last active Jan 12, 2016
Embed
What would you like to do?
Script that reads from a CartoDB table, geocode each row with Google's geocoder and put them back into CartoDB

This script reads a table fom CartoDB using the SQL API, geocodes one of its fields using Google's geocoder and updates the data on CartoDB.

If no valid credentials are provided for Google's geocoder, the free limits will apply.

TODO: multithread support

[cartodb]
account=test
api_key=27a4f4e9243fL4531caf8225aMN19632ce80ff7a
table=cities
field=name
buffer_size=200
[google]
api_key=
client_id=
private_key=
import ConfigParser
import sys
from cartodb import CartoDBAPIKey, CartoDBException
from pygeocoder import Geocoder
config = ConfigParser.RawConfigParser()
config.read("geocoder.conf")
def chunks(l, n):
"""
Yield successive n-sized chunks from l.
"""
for i in xrange(0, len(l), n):
yield l[i:i+n]
if __name__ == '__main__':
api_key = config.get('cartodb', 'api_key')
account = config.get('cartodb', 'account')
table = config.get('cartodb', 'table')
field = config.get('cartodb', 'field')
buffer_size = int(config.get('cartodb', 'buffer_size'))
api_key_google = config.get('google', 'api_key') or None
client_id_google = config.get('google', 'client_id') or None
private_key_google = config.get('google', 'private_key') or None
google_geocoder = Geocoder(api_key=api_key_google, client_id=client_id_google, private_key=private_key_google)
client = CartoDBAPIKey(api_key, account)
try:
response = client.sql('select cartodb_id, {field} from {table}'.format(field=field, table=table))
except CartoDBException as e:
sys.stderr.write("Error: {error}".format(error=e))
sys.exit(1)
for rows in chunks(response["rows"], buffer_size):
values = ""
for row in rows:
results = google_geocoder.geocode(row[field])
values += "({cartodb_id}, {lat}, {lng}),".format(cartodb_id=row["cartodb_id"], lat=results[0].coordinates[0],
lng=results[0].coordinates[1])
if values:
values = values[:-1]
query = "UPDATE {table} set the_geom=CDB_LatLng(data.lat, data.lng) from (values {values}) data(cartodb_id, lat, lng) " \
"where {table}.cartodb_id=data.cartodb_id;".format(table=table, values=values)
client.sql(query)
sys.stdout.write("Done.\n")
cartodb==0.8.1
pygeocoder==1.2.5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment