Last active
December 3, 2015 04:20
-
-
Save framingeinstein/c51981d439873f77e0e7 to your computer and use it in GitHub Desktop.
Geocodes ip address using R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!RScript | |
# | |
# USAGE: RScript geocode.R --in=input.xlsx --out=output.xslx --ip=column_name_of_ip_address_field | |
# | |
initial.options <- commandArgs(trailingOnly = FALSE) | |
file.arg.name <- "--file=" | |
script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) | |
script.basename <- dirname(script.name) | |
script.in <- sub("--in=", "", initial.options[grep("--in=", initial.options)]) | |
script.out <- sub("--out=", "", initial.options[grep("--out=", initial.options)]) | |
script.ip <- sub("--ip=", "", initial.options[grep("--ip=", initial.options)]) | |
setwd("/Users/jason/projects/geolocate") | |
list.of.packages <- c("xlsx", "rjson") | |
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
print(length(new.packages)) | |
if(length(new.packages) > 0) {install.packages(new.packages)} | |
lapply(list.of.packages, require, character.only = TRUE) | |
# returns string w/o leading whitespace | |
trim.leading <- function (x) sub("^\\s+", "", x) | |
# returns string w/o trailing whitespace | |
trim.trailing <- function (x) sub("\\s+$", "", x) | |
# returns string w/o leading or trailing whitespace | |
trim <- function (x) gsub("^\\s+|\\s+$", "", x) | |
freegeoip <- function(ip, format = ifelse(length(ip)==1,'list','dataframe')) | |
{ | |
if (1 == length(ip)) | |
{ | |
print(ip) | |
# a single IP address | |
if(is.na(ip)) | |
return(data.frame()) | |
url <- paste(c("http://freegeoip.net/json/", trim(ip)), collapse='') | |
#print(url) | |
ret <- tryCatch(fromJSON(file = url), | |
error=function(cond) { | |
message(paste("URL does not seem to exist:", url)) | |
message("Here's the original error message:") | |
message(cond) | |
# Choose a return value in case of error | |
return(data.frame()) | |
}, | |
warning=function(cond) { | |
message(paste("URL caused a warning:", url)) | |
message("Here's the original warning message:") | |
message(cond) | |
# Choose a return value in case of warning | |
return(data.frame()) | |
}) | |
print(ret) | |
if (format == 'dataframe') | |
ret <- data.frame(t(unlist(ret))) | |
return(ret) | |
} else { | |
ret <- data.frame() | |
for (i in 1:length(ip)) | |
{ | |
if(i==1 || !(ip[i] %in% ret$ip)){ | |
r <- freegeoip(ip[i], format="dataframe") | |
if(nrow(r) > 0){ | |
ret <- rbind(ret, r) | |
} | |
} else { | |
#print(ret) | |
} | |
} | |
return(ret) | |
} | |
} | |
data <- read.xlsx(script.in, sheetName = "Sheet1") | |
data$ip = gsub(" .*$", "", data[,script.ip]) | |
geo <- freegeoip(as.character(data$ip)) | |
merged <- merge(data, geo, by.x = "ip", by.y = "ip", all.x = TRUE) | |
write.xlsx(merged, file=script.out, sheetName='1', | |
col.names=TRUE, row.names=FALSE, append=TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment