Skip to content

Instantly share code, notes, and snippets.

@framingeinstein
Last active December 3, 2015 04:20
Show Gist options
  • Save framingeinstein/c51981d439873f77e0e7 to your computer and use it in GitHub Desktop.
Save framingeinstein/c51981d439873f77e0e7 to your computer and use it in GitHub Desktop.
Geocodes ip address using R
#!RScript
#
# USAGE: RScript geocode.R --in=input.xlsx --out=output.xslx --ip=column_name_of_ip_address_field
#
initial.options <- commandArgs(trailingOnly = FALSE)
file.arg.name <- "--file="
script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)])
script.basename <- dirname(script.name)
script.in <- sub("--in=", "", initial.options[grep("--in=", initial.options)])
script.out <- sub("--out=", "", initial.options[grep("--out=", initial.options)])
script.ip <- sub("--ip=", "", initial.options[grep("--ip=", initial.options)])
setwd("/Users/jason/projects/geolocate")
list.of.packages <- c("xlsx", "rjson")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
print(length(new.packages))
if(length(new.packages) > 0) {install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
# returns string w/o leading whitespace
trim.leading <- function (x) sub("^\\s+", "", x)
# returns string w/o trailing whitespace
trim.trailing <- function (x) sub("\\s+$", "", x)
# returns string w/o leading or trailing whitespace
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
freegeoip <- function(ip, format = ifelse(length(ip)==1,'list','dataframe'))
{
if (1 == length(ip))
{
print(ip)
# a single IP address
if(is.na(ip))
return(data.frame())
url <- paste(c("http://freegeoip.net/json/", trim(ip)), collapse='')
#print(url)
ret <- tryCatch(fromJSON(file = url),
error=function(cond) {
message(paste("URL does not seem to exist:", url))
message("Here's the original error message:")
message(cond)
# Choose a return value in case of error
return(data.frame())
},
warning=function(cond) {
message(paste("URL caused a warning:", url))
message("Here's the original warning message:")
message(cond)
# Choose a return value in case of warning
return(data.frame())
})
print(ret)
if (format == 'dataframe')
ret <- data.frame(t(unlist(ret)))
return(ret)
} else {
ret <- data.frame()
for (i in 1:length(ip))
{
if(i==1 || !(ip[i] %in% ret$ip)){
r <- freegeoip(ip[i], format="dataframe")
if(nrow(r) > 0){
ret <- rbind(ret, r)
}
} else {
#print(ret)
}
}
return(ret)
}
}
data <- read.xlsx(script.in, sheetName = "Sheet1")
data$ip = gsub(" .*$", "", data[,script.ip])
geo <- freegeoip(as.character(data$ip))
merged <- merge(data, geo, by.x = "ip", by.y = "ip", all.x = TRUE)
write.xlsx(merged, file=script.out, sheetName='1',
col.names=TRUE, row.names=FALSE, append=TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment