Skip to content

Instantly share code, notes, and snippets.

@prise6
Last active December 16, 2016 09:21
Show Gist options
  • Save prise6/b873f4934083dd9e70e4977da711be0d to your computer and use it in GitHub Desktop.
Save prise6/b873f4934083dd9e70e4977da711be0d to your computer and use it in GitHub Desktop.
IPV4, IPV6, ASN from RIRs by date
# ------------------------------------------------
#
# GET IP RANGE BY DATE
# with RIR informations
#
# ------------------------------------------------
library(data.table)
library(iptools)
library(plyr)
read.dirs = function(url, output.tmp = "Datas/tmp", compression = NULL, drop = NULL, skip = 0) {
colClasses = c("character", "character", "character", "character", "integer", "integer", "character")
col.names = c("REGISTRE", "CC", "TYPE", "START", "VALUE", "DATE", "STATUS")
if(!is.null(drop))
colClasses = c(colClasses, "character")
if(!is.null(compression)){
download.file(
url = url,
destfile = output.tmp,
mode = "wb",
quiet = TRUE
)
if (compression == "bz")
zz = bzfile(output.tmp)
else if (compression == "gz")
zz = gzfile(output.tmp)
else
stop("Compression non valide")
d = data.table(read.table(
file = zz,
sep = "|",
skip = skip,
encoding = "UTF-8",
stringsAsFactors = F,
colClasses = colClasses,
col.names = col.names,
na.strings = c("", "NA", "NULL"),
header = F
))
unlink(output.tmp)
} else {
d = data.table::fread(
input = url,
sep = "|",
header = F,
skip = skip,
encoding = "UTF-8",
stringsAsFactors = F,
drop = drop,
colClasses = colClasses,
col.names = col.names,
na.strings = c("", "NA", "NULL"),
showProgress = F
)
}
return(unique(d))
}
get.ranges = function(rir, date = NULL) {
url = getUrlFromRIR(rir, date = date)
options = switch(
rir,
ARIN = list(compression = NULL, drop = 8, skip = 4),
RIPE = list(compression = "bz", drop = NULL, skip = 4),
AFRINIC = list(compression = NULL, drop = NULL, skip = 4),
APNIC = list(compression = "gz", drop = NULL, skip = 31),
LACNIC = list(compression = NULL, drop = NULL, skip = 4))
d = read.dirs(url, output.tmp = "Datas/tmp", options$compression, options$drop, options$skip)
d = d[TYPE == "ipv4"]
d[, `:=` (
TYPE = NULL,
RANGE = paste0(START, "/", 32-log2(VALUE)),
START = NULL,
VALUE = NULL,
STATUS = NULL,
DATE = as.IDate(as.character(DATE), format = "%Y%m%d")
)]
}
getUrlFromRIR = function(rir, date = NULL) {
if(date == "today" || is.null(date))
date = Sys.Date()
date = as.IDate(date)
actual = FALSE
if(format(date, "%Y") == format(Sys.Date(), "%Y"))
actual = TRUE
if(rir == "ARIN") {
if(isTRUE(actual))
url = paste0("ftp://ftp.arin.net/pub/stats/arin/", "delegated-arin-extended-", format(date, "%Y%m%d"))
else
url = paste0("ftp://ftp.arin.net/pub/stats/arin/archive/", format(date, "%Y"), "/delegated-arin-extended-", format(date, "%Y%m%d"))
} else if ( rir == "RIPE") {
url = paste0("ftp://ftp.ripe.net/pub/stats/ripencc/", format(date, "%Y"), "/delegated-ripencc-", format(date, "%Y%m%d"), ".bz2")
} else if ( rir == "AFRINIC" ) {
url = paste0("ftp://ftp.afrinic.net/pub/stats/afrinic/", format(date, "%Y"), "/delegated-afrinic-", format(date, "%Y%m%d"))
} else if ( rir == "APNIC" ) {
url = paste0("ftp://ftp.apnic.net/pub/stats/apnic/", format(date, "%Y"), "/delegated-apnic-", format(date, "%Y%m%d"), ".gz")
} else if ( rir == "LACNIC" ) {
url = paste0("ftp://ftp.lacnic.net/pub/stats/lacnic/", "delegated-lacnic-", format(date, "%Y%m%d"))
} else
stop(sprintf("%s RIR doesn't exit", rir))
return(url)
}
formatIPV4 = function(ipv4) {
ipv4[, c("MIN_NUMERIC", "MAX_NUMERIC") := as.list(iptools::range_boundaries(RANGE)[, 3:4])][
, `:=` (MIN_NUMERIC = bit64::as.integer64(MIN_NUMERIC), MAX_NUMERIC = bit64::as.integer64(MAX_NUMERIC))]
setnames(ipv4, old = colnames(ipv4), new = paste0("IPV4_", colnames(ipv4)))
setkeyv(ipv4, c("IPV4_DATE_ALIM", "IPV4_RANGE", "IPV4_CC", "IPV4_DATE"))
setorder(ipv4, IPV4_RANGE, -IPV4_DATE)
ipv4
}
findIPCountyCode = function(x, y, x.field, y.start, y.end) {
tmp.x = x[, list(IP = unique(get(x.field)))][!is.na(IP)][, X := bit64::as.integer64(iptools::ip_to_numeric(IP))][, dummy := X]
setnames(tmp.x, old = "IP", new = x.field)
setkeyv(tmp.x, c("X", "dummy"))
setkeyv(y, c(y.start, y.end))
d = data.table::foverlaps(tmp.x, y, nomatch = 0L, mult = "first")[, `:=` (dummy = NULL, X = NULL, IPV4_REGISTRE = NULL,
IPV4_DATE = NULL, IPV4_RANGE = NULL,
IPV4_DATE_ALIM = NULL, IPV4_MIN_NUMERIC = NULL, IPV4_MAX_NUMERIC = NULL)]
d[, IPV4_CC := factor(IPV4_CC)]
d = merge(x, d, all.x = T, by = x.field)
return(d)
}
# utilisation
system.time({
arin = get.ranges(rir = "ARIN", date = "2016-11-01")
ripe = get.ranges(rir = "RIPE", date = "2016-11-01")
afrinic = get.ranges(rir = "AFRINIC", date = "2015-01-02")
apnic = get.ranges(rir = "APNIC", date = "2015-01-02")
lacnic = get.ranges(rir = "LACNIC", date = "2015-01-02")
})
system.time({
dates = c("2014-01-10", "2015-01-10", "2016-01-10")
ipv4 = rbindlist(lapply(dates, function(date){
d = rbindlist(lapply(list("ARIN", "RIPE", "AFRINIC", "APNIC", "LACNIC"), get.ranges, date = date))
d[, DATE_ALIM := date]
}))
ipv4 = formatIPV4(ipv4)
})
connexions = findIPCountyCode(connexions, ipv4, x.field = "ADR_IP", y.start = "IPV4_MIN_NUMERIC", y.end = "IPV4_MAX_NUMERIC")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment