Skip to content

Instantly share code, notes, and snippets.

@valentinitnelav
Last active November 8, 2016 16:29
Show Gist options
  • Save valentinitnelav/0f2ce01aadd624d7d2772ac6a5c69780 to your computer and use it in GitHub Desktop.
Save valentinitnelav/0f2ce01aadd624d7d2772ac6a5c69780 to your computer and use it in GitHub Desktop.
# Call library data.table
library(data.table)
# ============================================ #
# Prepare data #
# ============================================ #
# Read your data.
DT <- fread("Hawaii_sites.csv")
# Get only lat-long unique records.
# Is better to work only on unique lat-long combinations
# because of the Google Maps Geocoding API usage limits.
# You can merge back the results later.
XY.unq <- unique(DT[, .(LAT, LON)])
# Give your Google API key
# A key can look something like xIzaAyFWWziisk2V-IwkHLoQ3flu2vN6xp7Tms0
API_key <- "your key"
# Build google maps API XML link/query for each record
XY.unq[, link := paste0("https://maps.googleapis.com/maps/api/geocode/xml?latlng=", LAT, ",", LON, "&key=", API_key)]
# Check out more at https://developers.google.com/maps/documentation/geocoding/intro#reverse-example
# Their examples are based on json and not xml, but the logics are the same.
# Get XML return for each link with readLines()
# Use lapply because readLines is not vectorized.
XY.unq[, XY_XML := lapply(link, readLines)]
# The new column XY_XML is actually a list of character vectors
# Note that, the response from the reverse geocoder can be a bit slow
# e.g. 17-20 sec for 250 queries (links);
# there are also limitations: https://developers.google.com/maps/documentation/geocoding/usage-limits
# Save/Load object
# save(XY.unq, file = "XY.unq.rda")
# load(file = "XY.unq.rda")
# ============================================ #
# Function to get desired data from XML format #
# ============================================ #
Get_from_XML <- function(xml, type){
# The "type" meanings can be found here:
# https://developers.google.com/maps/documentation/geocoding/intro#Types
# Get index of first match in the xml string
idx <- grep(pattern = type, x = xml, fixed = TRUE)[1]
# Replace everything between <> with empty string ''
# Also pay attention to where the needed data is located in the xml string,
# therefore, pay attention to the corresponding index.
# For example, the name of the country is 2 lines up in the xml structure
# from where “country” searching word is actually located.
my.gsub <- function(x) gsub(pattern = ' *<.*?> *', replacement = '', x)
switch(type,
status = my.gsub(x = xml[idx]),
formatted_address = my.gsub(x = xml[idx]),
administrative_area_level_1 = ,
administrative_area_level_2 = ,
administrative_area_level_3 = ,
administrative_area_level_4 = ,
administrative_area_level_5 = ,
country = my.gsub(x = xml[idx-2]),
stop("Provide a type!")
)
}
# ============================================ #
# Extract desired address data in new columns #
# ============================================ #
# Use sapply to vectorize Get_from_XML() function built above
XY.unq[, ":="
(Status = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'status')),
# status codes possible in a reverse geocoding response at:
# https://developers.google.com/maps/documentation/geocoding/intro#reverse-response
Country = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'country')),
Admin1 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_1')),
Admin2 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_2')),
Admin3 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_3')),
Admin4 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_4')),
Admin5 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_5')),
Address = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'formatted_address'))
# Check Address Types and Address Component Types at:
# https://developers.google.com/maps/documentation/geocoding/intro#Types
)]
# Remove unwanted columns
colnames(XY.unq)
XY.unq[, c("link", "XY_XML") := NULL]
# Left Outer Join DT with XY.unq (using data.table::merge() syntax)
RevGeo <- merge(x = DT, y = XY.unq, by = c("LAT", "LON"), all.x = TRUE)
# or using data.table syntax
# RevGeo <- XY.unq[DT, on = c("LAT", "LON")]
# Save/Load object
# save(RevGeo, file = "RevGeo.rda")
# load(file = "RevGeo.rda")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment