Skip to content

Instantly share code, notes, and snippets.

@Dulani
Last active August 29, 2015 14:09
Show Gist options
  • Save Dulani/985c32f5b14e64e3c792 to your computer and use it in GitHub Desktop.
Save Dulani/985c32f5b14e64e3c792 to your computer and use it in GitHub Desktop.
Geocoding in R using Google's API
# Geocoding in R using Google's API
require(RCurl)
require(XML)
require(data.table)
require(dplyr)
ridbLocs <- data.table(read.delim(file = "Data/ridbLocations.tsv"))
setnames(ridbLocs,c("original","locationType","simplified1","gCode","simplified","lat","lon"))
#' Model formula from my Google Sheet:
#' =IMPORTXML("https://maps.googleapis.com/maps/api/geocode/xml?address="&E2&"&key={insert api key here}","//result[1]/geometry[1]/location[1]/lng[1]")
# Dulani's Google Geocoding API Key
apiKey <- "{insert api key here}"
baseURL <- "https://maps.googleapis.com/maps/api/geocode/"
# queryString <- "ECHTERDINGEN ARMY AIR FIELD, GERMANY"
geoQuery <- function(baseURL,apiKey,queryString,queryType=c("XML","JSON")) {
#Model URL:
# https://maps.googleapis.com/maps/api/geocode/xml?address=ECHTERDINGEN ARMY AIR FIELD, GERMANY&key=AIzaSyB_QN2SLYO1TCKd_MUPVXEXVwe3jgo6myQ
queryType <- switch(queryType[1],
XML = "xml",
JSON = "json")
#Build URL
url <- paste(baseURL,queryType,"?address=",queryString,"&key=",apiKey,sep="")
#Replace spaces with pluses (Google's API doesn't seem to care. But that's what their own app does so I'm mimicking).
url <- gsub(pattern = " ",replacement = "+",x = url)
result <- getURL(url)
#Should do some parsing here of XML or JSON and return something more useful in R.
return(result)
}
setnames(ridbLocs,c("original","locationType","simplified1","gCode","simplified","lat","lon"))
for(curRow in 1:nrow(ridbLocs)) {
# curRow <- 1
cat(paste("Searching for:",ridbLocs[curRow,simplified],"\t\t"))
fullXML <- geoQuery(baseURL,apiKey,queryString=ridbLocs[curRow,simplified])
parsedXML <- xmlParse(fullXML)
status <- xpathSApply(parsedXML, "//GeocodeResponse[1]/status[1]",xmlValue)
if(status=="OK") {
loc <- xpathSApply(parsedXML, "//result[1]/formatted_address[1]",xmlValue)
cat(paste("Found: ",loc, "\n"))
}
if(status=="ZERO_RESULTS") {
loc <- status
cat(paste(status,"\n"))
}
ridbLocs <- ridbLocs[curRow,XML:=fullXML]
ridbLocs <- ridbLocs[curRow,locationFound:=loc]
Sys.sleep(time = 0.25) #Google's "free" limit is 5 per second and 2,500 per day.
}
#For some reason, the lat/lon don't appear to be updated:
getGeoCoord <- function(xml,coord=c("lat","lon")) {
# browser()
# xml <- ridbLocs[3,XML]
#Extract the latitude or longitude from the XML output of Google's Geocoding API.
xml <- xmlParse(xml)
coord <- switch(coord[1],
lat = "lat",
lon = "lng" #Google uses the abbreviation 'lng' for longitude rather than the more common(?) 'lon'
)
geoCoord <- xpathSApply(xml, sprintf("//result[1]/geometry[1]/location[1]/%s[1]",coord),xmlValue)
if(is.null(geoCoord)) {
geoCoord <- NA
} else {
geoCoord <- as.numeric(geoCoord)
}
return(geoCoord)
}
ridbLocs <- ridbLocs %>%
mutate(lon=sapply(ridbLocs[,XML],getGeoCoord,"lon",USE.NAMES = F,simplify = T)) %>%
mutate(lat=sapply(ridbLocs[,XML],getGeoCoord,"lat",USE.NAMES = F,simplify = T)) %>%
mutate(id=1:nrow(ridbLocs)) %>%
mutate(goodMatch = ifelse(locationFound=="ZERO_RESULTS",F,T)) %>% #Add a flag for the rows that need a manual review
select(-simplified1) #Drop a column.
View(select(ridbLocs,-XML))
save(ridbLocs,file="Data/Geocoded RIDB Locations.RData")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment