-
-
Save mhkeller/3834498 to your computer and use it in GitHub Desktop.
google geocoding API, R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Forked from @BrianAbelson | |
library('rjson') # For parsing json in R | |
library('RCurl') # For sending an http request | |
library('plyr') # For ddply | |
# Takes an data frame with a unique id column and an address column at the very least | |
# it returns a data frame with the uid, lat, lng and specificity of geocoding | |
# use the uid to join these results back to your data if you want | |
# you can also accomplish roughly the same thing with a for loop instead of ddply | |
# in that case, your original data frame can have empty columns for lat, lng, and specificity | |
# which are filled as the loop cycles. | |
# ddply gives you a fancy progress bar, though. | |
# But you can print the index of the for loop so that's pretty much a progress bar as well | |
geocode.addr <- function(uid_query) { | |
# Avoid rate limits by pausing from 1 to 3 seconds | |
Sys.sleep(sample(seq(1, 3, by=0.001), 1)) | |
# The important columns we want from our passed row | |
uid <- uid_query$uid | |
query <- uid_query$loc | |
# You can also return the addresses as a simplified csv with this http://maps.google.com/maps/geo?output=csv&q= | |
# or change that to output=xml which has more detailed results. | |
# This JSON request has a lot of good detail if you need it and JSON is nicer | |
geo.url <- "http://maps.googleapis.com/maps/api/geocode/json?address=" | |
geo.text <- try(getURL(paste(geo.url, URLencode(query), "&sensor=false", sep=""))) | |
# If it didn't work with getURL, give it a go with readLines | |
if(class(geo.text)=="try-error"){ | |
geo.text = try(readLines(paste(geo.url, URLencode(query), "&sensor=false", sep=""))) | |
} | |
# Give up | |
if (class(geo.text)=="try-error"){ | |
print(paste("having trouble reading this query:", uid)) | |
} | |
geo.json <- fromJSON(geo.text) | |
# There are other data points you can grab but I'm most interested in these. | |
if(geo.json$status == "OK"){ | |
print(uid) | |
lat = geo.json$results[[1]]$geometry$location$lat | |
lng = geo.json$results[[1]]$geometry$location$lng | |
type = geo.json$results[[1]]$geometry$location_type | |
info <- data.frame(uid, lat, lng, type, stringsAsFactors=F) | |
return(info) | |
} else{ | |
if(geo.json$status == "OVER_QUERY_LIMIT") { | |
stop(paste("Hit rate limit at:", uid)) | |
} | |
} | |
} | |
output = ddply(uid_address, .(uid), geocode.addr, .progress="text") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment