Last active
August 29, 2015 14:09
-
-
Save Dulani/985c32f5b14e64e3c792 to your computer and use it in GitHub Desktop.
Geocoding in R using Google's API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Geocoding in R using Google's API | |
require(RCurl) | |
require(XML) | |
require(data.table) | |
require(dplyr) | |
ridbLocs <- data.table(read.delim(file = "Data/ridbLocations.tsv")) | |
setnames(ridbLocs,c("original","locationType","simplified1","gCode","simplified","lat","lon")) | |
#' Model formula from my Google Sheet: | |
#' =IMPORTXML("https://maps.googleapis.com/maps/api/geocode/xml?address="&E2&"&key={insert api key here}","//result[1]/geometry[1]/location[1]/lng[1]") | |
# Dulani's Google Geocoding API Key | |
apiKey <- "{insert api key here}" | |
baseURL <- "https://maps.googleapis.com/maps/api/geocode/" | |
# queryString <- "ECHTERDINGEN ARMY AIR FIELD, GERMANY" | |
geoQuery <- function(baseURL,apiKey,queryString,queryType=c("XML","JSON")) { | |
#Model URL: | |
# https://maps.googleapis.com/maps/api/geocode/xml?address=ECHTERDINGEN ARMY AIR FIELD, GERMANY&key=AIzaSyB_QN2SLYO1TCKd_MUPVXEXVwe3jgo6myQ | |
queryType <- switch(queryType[1], | |
XML = "xml", | |
JSON = "json") | |
#Build URL | |
url <- paste(baseURL,queryType,"?address=",queryString,"&key=",apiKey,sep="") | |
#Replace spaces with pluses (Google's API doesn't seem to care. But that's what their own app does so I'm mimicking). | |
url <- gsub(pattern = " ",replacement = "+",x = url) | |
result <- getURL(url) | |
#Should do some parsing here of XML or JSON and return something more useful in R. | |
return(result) | |
} | |
setnames(ridbLocs,c("original","locationType","simplified1","gCode","simplified","lat","lon")) | |
for(curRow in 1:nrow(ridbLocs)) { | |
# curRow <- 1 | |
cat(paste("Searching for:",ridbLocs[curRow,simplified],"\t\t")) | |
fullXML <- geoQuery(baseURL,apiKey,queryString=ridbLocs[curRow,simplified]) | |
parsedXML <- xmlParse(fullXML) | |
status <- xpathSApply(parsedXML, "//GeocodeResponse[1]/status[1]",xmlValue) | |
if(status=="OK") { | |
loc <- xpathSApply(parsedXML, "//result[1]/formatted_address[1]",xmlValue) | |
cat(paste("Found: ",loc, "\n")) | |
} | |
if(status=="ZERO_RESULTS") { | |
loc <- status | |
cat(paste(status,"\n")) | |
} | |
ridbLocs <- ridbLocs[curRow,XML:=fullXML] | |
ridbLocs <- ridbLocs[curRow,locationFound:=loc] | |
Sys.sleep(time = 0.25) #Google's "free" limit is 5 per second and 2,500 per day. | |
} | |
#For some reason, the lat/lon don't appear to be updated: | |
getGeoCoord <- function(xml,coord=c("lat","lon")) { | |
# browser() | |
# xml <- ridbLocs[3,XML] | |
#Extract the latitude or longitude from the XML output of Google's Geocoding API. | |
xml <- xmlParse(xml) | |
coord <- switch(coord[1], | |
lat = "lat", | |
lon = "lng" #Google uses the abbreviation 'lng' for longitude rather than the more common(?) 'lon' | |
) | |
geoCoord <- xpathSApply(xml, sprintf("//result[1]/geometry[1]/location[1]/%s[1]",coord),xmlValue) | |
if(is.null(geoCoord)) { | |
geoCoord <- NA | |
} else { | |
geoCoord <- as.numeric(geoCoord) | |
} | |
return(geoCoord) | |
} | |
ridbLocs <- ridbLocs %>% | |
mutate(lon=sapply(ridbLocs[,XML],getGeoCoord,"lon",USE.NAMES = F,simplify = T)) %>% | |
mutate(lat=sapply(ridbLocs[,XML],getGeoCoord,"lat",USE.NAMES = F,simplify = T)) %>% | |
mutate(id=1:nrow(ridbLocs)) %>% | |
mutate(goodMatch = ifelse(locationFound=="ZERO_RESULTS",F,T)) %>% #Add a flag for the rows that need a manual review | |
select(-simplified1) #Drop a column. | |
View(select(ridbLocs,-XML)) | |
save(ridbLocs,file="Data/Geocoded RIDB Locations.RData") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment