Last active
November 8, 2016 16:29
-
-
Save valentinitnelav/0f2ce01aadd624d7d2772ac6a5c69780 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Call library data.table | |
library(data.table) | |
# ============================================ # | |
# Prepare data # | |
# ============================================ # | |
# Read your data. | |
DT <- fread("Hawaii_sites.csv") | |
# Get only lat-long unique records. | |
# Is better to work only on unique lat-long combinations | |
# because of the Google Maps Geocoding API usage limits. | |
# You can merge back the results later. | |
XY.unq <- unique(DT[, .(LAT, LON)]) | |
# Give your Google API key | |
# A key can look something like xIzaAyFWWziisk2V-IwkHLoQ3flu2vN6xp7Tms0 | |
API_key <- "your key" | |
# Build google maps API XML link/query for each record | |
XY.unq[, link := paste0("https://maps.googleapis.com/maps/api/geocode/xml?latlng=", LAT, ",", LON, "&key=", API_key)] | |
# Check out more at https://developers.google.com/maps/documentation/geocoding/intro#reverse-example | |
# Their examples are based on json and not xml, but the logics are the same. | |
# Get XML return for each link with readLines() | |
# Use lapply because readLines is not vectorized. | |
XY.unq[, XY_XML := lapply(link, readLines)] | |
# The new column XY_XML is actually a list of character vectors | |
# Note that, the response from the reverse geocoder can be a bit slow | |
# e.g. 17-20 sec for 250 queries (links); | |
# there are also limitations: https://developers.google.com/maps/documentation/geocoding/usage-limits | |
# Save/Load object | |
# save(XY.unq, file = "XY.unq.rda") | |
# load(file = "XY.unq.rda") | |
# ============================================ # | |
# Function to get desired data from XML format # | |
# ============================================ # | |
Get_from_XML <- function(xml, type){ | |
# The "type" meanings can be found here: | |
# https://developers.google.com/maps/documentation/geocoding/intro#Types | |
# Get index of first match in the xml string | |
idx <- grep(pattern = type, x = xml, fixed = TRUE)[1] | |
# Replace everything between <> with empty string '' | |
# Also pay attention to where the needed data is located in the xml string, | |
# therefore, pay attention to the corresponding index. | |
# For example, the name of the country is 2 lines up in the xml structure | |
# from where “country” searching word is actually located. | |
my.gsub <- function(x) gsub(pattern = ' *<.*?> *', replacement = '', x) | |
switch(type, | |
status = my.gsub(x = xml[idx]), | |
formatted_address = my.gsub(x = xml[idx]), | |
administrative_area_level_1 = , | |
administrative_area_level_2 = , | |
administrative_area_level_3 = , | |
administrative_area_level_4 = , | |
administrative_area_level_5 = , | |
country = my.gsub(x = xml[idx-2]), | |
stop("Provide a type!") | |
) | |
} | |
# ============================================ # | |
# Extract desired address data in new columns # | |
# ============================================ # | |
# Use sapply to vectorize Get_from_XML() function built above | |
XY.unq[, ":=" | |
(Status = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'status')), | |
# status codes possible in a reverse geocoding response at: | |
# https://developers.google.com/maps/documentation/geocoding/intro#reverse-response | |
Country = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'country')), | |
Admin1 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_1')), | |
Admin2 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_2')), | |
Admin3 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_3')), | |
Admin4 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_4')), | |
Admin5 = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'administrative_area_level_5')), | |
Address = sapply(XY_XML, function(i) Get_from_XML(xml = i, type = 'formatted_address')) | |
# Check Address Types and Address Component Types at: | |
# https://developers.google.com/maps/documentation/geocoding/intro#Types | |
)] | |
# Remove unwanted columns | |
colnames(XY.unq) | |
XY.unq[, c("link", "XY_XML") := NULL] | |
# Left Outer Join DT with XY.unq (using data.table::merge() syntax) | |
RevGeo <- merge(x = DT, y = XY.unq, by = c("LAT", "LON"), all.x = TRUE) | |
# or using data.table syntax | |
# RevGeo <- XY.unq[DT, on = c("LAT", "LON")] | |
# Save/Load object | |
# save(RevGeo, file = "RevGeo.rda") | |
# load(file = "RevGeo.rda") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment