Last active
August 29, 2015 14:02
-
-
Save davidski/1c675266945a61db7ef5 to your computer and use it in GitHub Desktop.
ports ~ country + time visualizaton code as stolen from @jayjacobs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inbound port movie maker | |
# Adopted nearly wholesale from @jayjacobs at DataDrivenSecurity.info | |
# mangled by @davidski | |
# which weeks should we look at? | |
#whichweek <- c(27, 28, 29, 30) | |
# how many countries to show? | |
numcountry <- 25 # 20 at first | |
tcp.protocol <- 6 | |
udp.protocol <- 17 | |
# read in event_data from your friendly neighborhood data store | |
event_data <- read.csv("results_scan.csv", header=T, sep="\t", comment.char="", | |
colClasses=c("character", "factor", "integer", "integer")) | |
# convert datetime to POSIX date/time object | |
event_data$datetime <- strptime(event_data$X.timestamp, | |
format='%Y-%m-%dT%H:%M:%S', tz='GMT') | |
names(event_data) <- c("timestamp", "country", "proto", "dpt", "datetime") #normalize names to what @jayjacobs expects | |
# drop any weird date formats | |
# and filter just the columns we want | |
#event_data <- event_data[complete.cases(event_data[ ,"datetime"]), | |
# c("datetime", "proto", "dpt", "country")] | |
event_data <- event_data[, c("datetime", "proto", "dpt", "country")] | |
# filter out the weeks. | |
#week <- factor(format(event_data$datetime, "%V")) | |
#mx <- event_data[week %in% whichweek, ] # mx is subset of event_data data | |
# finally, filter for only those entries where we have a port | |
mx <- event_data[!is.na(event_data$dpt), ] | |
# divide into 5 minute chunks | |
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5)))) | |
# create the timeline to tick through | |
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300) | |
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5)))) | |
# allframes now has every possible "frame" we want to show in it. | |
# set up source country names in "cnames" | |
cname.tbl <- table(mx$country) | |
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl)) > 0], decreasing=T) | |
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T) | |
cnames <- head(cnames[nchar(cnames) > 0], numcountry) | |
# setting a seed to ensure colors are consistent | |
library(RColorBrewer) | |
#set.seed(2) | |
#ccolor <- as.character(rainbow(length(cnames))) | |
ccolor <- colorRampPalette(brewer.pal(12, 'Set3'))(length(cnames)) | |
names(ccolor) <- cnames | |
# now filter out where country is known. | |
mx <- mx[mx$country %in% cnames,] | |
# add in a frequency counter for later agregation | |
mx$freq <- 1 | |
# set up dimensions of image, set for widescreen laptop | |
ht <- 900 | |
wt <- 1600 | |
# set up label positions, space them evenly | |
cht <- ht / length(cnames) | |
# cpos has the country positions for y value | |
cpos <- seq(cht / 2, ht, by=cht) | |
names(cpos) <- cnames | |
# look at ports | |
# set number of ports to view | |
numport <- 100 | |
#summarize the frequency of the ports (number of hits) | |
tcp <- summary(factor(mx$dpt[mx$proto==tcp.protocol]), maxsum=numport) | |
udp <- summary(factor(mx$dpt[mx$proto==udp.protocol]), maxsum=numport) | |
#set the last element to other as the catch all | |
tcp.name <- rev(c(sort(as.numeric(head(names(tcp), numport - 1))), "Other")) | |
udp.name <- rev(c(sort(as.numeric(head(names(udp), numport - 1))), "Other")) | |
#determine the spacing for our port names across the height of the image | |
ht.tcp <- ht / length(tcp.name) | |
ht.udp <- ht / length(udp.name) | |
# hpos has the host positions for y value | |
ypos.tcp <- seq(ht.tcp/2, ht, by=ht.tcp) | |
ypos.udp <- seq(ht.udp/2, ht, by=ht.udp) | |
names(ypos.tcp) <- tcp.name | |
names(ypos.udp) <- udp.name | |
# need to know how to scale the bar plots on the side | |
max.tcp <- max(tcp) | |
max.udp <- max(udp) | |
# set x position for hosts and countries | |
tcpx <- wt - 150 #width minus constant for right offset | |
udpx <- 150 #left ofset | |
hostx <- 50 #offset for the credits blurb | |
countryx <- wt / 2 #country is centered | |
gap <- 1 # gap between bars in barplots | |
################ | |
# okay, this is where we set things up to loop on | |
start <- Sys.time() # timing it | |
steps <- 60 # how many frames for moving a ball across the screen | |
outdf <- data.frame() # data.frame of all balls | |
# data for barplot on the host side | |
tcp.box <- data.frame(name=tcp.name, count=0, | |
xleft=tcpx, ybottom=ypos.tcp - (ht.tcp/2) + gap, | |
xright=tcpx, ytop=ypos.tcp + (ht.tcp/2) - gap, row.names = NULL) | |
udp.box <- data.frame(name=udp.name, count=0, | |
xleft=udpx-8, ybottom=ypos.udp-(ht.udp/2)+gap, | |
xright=udpx-8, ytop=ypos.udp+(ht.udp/2)-gap, row.names=NULL) | |
date.label <- NULL | |
#size of all labels is going to be 0.5 by default | |
tcp.size <- rep(0.5, length(tcp.name)) | |
names(tcp.size) <- tcp.name | |
udp.size <- rep(0.5, length(udp.name)) | |
names(udp.size) <- udp.name | |
# allframes <- allframes[1:40] # for testing, can cut down to a handful of frames | |
# loop through all the image frames, plus time for the balls to "land" and a | |
# smidge of padding to "make the experience more pleasureable" | |
# this is a beast of a loop to run through. Is it possible to parallelize this operation? | |
for(image in seq(length(allframes)+steps+20)) { | |
# image is the frame number we are showing. | |
# set "it" to be the frame ID, or zero if we are done reading in new data | |
it <- ifelse(image <= length(allframes), allframes[image], 0) | |
# test if we have any data to read for this frame | |
if (sum(mx$frame==it) > 0) { | |
# update the date label (to be shown at the top) - weekday, month, day, HH:MM TZ | |
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p %Z") | |
# prep the data | |
# aggregate, per port and country combination | |
tmx <- aggregate(freq ~ dpt + country + proto, data=mx[mx$frame==it, ], FUN=sum) | |
# foreach host+country combination, create a row in data.frame | |
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) { | |
country <- as.character(tmx$country[i]) | |
fromy <- cpos[country] | |
if (tmx$proto[i]==tcp.protocol) { | |
dport <- as.character(ifelse(any(names(ypos.tcp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.tcp) %in% dport), ypos.tcp[dport], ypos.tcp["Other"]) | |
tox <- tcpx | |
mult <- 1 | |
fromx <- countryx + (nchar(country) * 7 * mult) | |
dpt <- tmx$dpt[i] | |
} else { | |
dport <- as.character(ifelse(any(names(ypos.udp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.udp) %in% dport), ypos.udp[dport], ypos.udp["Other"]) | |
tox <- udpx | |
mult <- -1 | |
fromx <- countryx + (nchar(country) * 7 * mult) | |
dpt <- tmx$dpt[i] | |
} | |
data.frame(fromx=fromx, fromy=fromy, | |
tox= tox - (nchar(dport) * 5 * mult), | |
toy= toy + rnorm(1, mean=0, sd=ht.tcp / 2), | |
curx=fromx, cury=fromy, col=ccolor[country], | |
size=tmx$freq[i], time=1, mult=mult, | |
dport=dport, country=country, dpt=dpt, row.names=NULL) | |
})) | |
} else { | |
# else we have no new data, just make empty data.frame | |
newdf <- data.frame() | |
} | |
if(nrow(outdf)>0) { # we have balls in the air | |
# update the current value based on which step the ball is in. | |
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx | |
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy | |
outdf$time <- outdf$time + 1 | |
# rbind the old data with new data | |
if (nrow(newdf)) { | |
outdf <- rbind(outdf, newdf) | |
} | |
} else { # fresh df | |
if (nrow(newdf)) { | |
outdf <- newdf | |
} | |
} | |
# set up plot | |
png(filename=sprintf("ports/base%04d.png", image), width=wt, height=ht) | |
# set small margin in inches | |
par(mai=c(0,0.2,0,0.2)) | |
# open up an empty plot | |
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1, ht + 100), | |
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i") | |
offset <- 30 | |
# add country labels | |
text(countryx+5, cpos+offset, labels=cnames, cex=2, adj=0.5) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==1] | |
tcp.size <- ifelse(tcp.size>0.5, tcp.size * 0.9, 0.5) | |
tcp.size[tcp.name %in% bigport] <- 2 | |
text(tcpx-5, ypos.tcp+offset, labels=tcp.name, cex=tcp.size, adj=1) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==-1] | |
# udp.size <- sapply(udp.size, function(x) mean(c(x, 1))) | |
udp.size <- ifelse(udp.size>0.5, udp.size * 0.9, 0.5) | |
udp.size[udp.name %in% bigport] <- 2 | |
text(udpx-5, ypos.udp+offset, labels=udp.name, cex=udp.size, adj=0) | |
# add the date labels | |
text(wt/2, ht+offset+5, labels=date.label, cex=3, adj=c(0.5, 0)) | |
# add tcp/udp header | |
text(tcpx, ht+offset+5, labels="TCP", cex=2, adj=c(0.5, 0)) | |
text(udpx, ht+offset+5, labels="UDP", cex=2, adj=c(0.5, 0)) | |
# stick a little URL in the corner | |
text(hostx, 8, labels="Thanks to http://datadrivensecurity.info", cex=2, | |
col="slateblue", adj=c(0.5, 0), font=3) | |
# now include all the points (balls) in the plot | |
with(outdf, points(curx, cury + offset, type="p", pch=16, | |
col=as.character(col), cex=log10(size))) #cex was sqrt(size) | |
# test to see if we should increase the country barplot (look for time==1) | |
if (sum(outdf$time==steps) > 0) { | |
cbase <- aggregate(size ~ dport + mult, data=outdf[outdf$time==steps, ], FUN=sum) | |
for(x in seq(nrow(cbase))) { | |
if (cbase$mult[x]==1) { | |
thisone <- which(tcp.box$name==as.character(cbase$dport)[x]) | |
tcp.box$xright[thisone] <- tcp.box$xright[thisone] + | |
(150 * (as.numeric(cbase$size[x]) / max.tcp) * cbase$mult[x]) | |
} else { | |
thisone <- which(udp.box$name==as.character(cbase$dport)[x]) | |
udp.box$xright[thisone] <- udp.box$xright[thisone] + | |
(150 * (as.numeric(cbase$size[x]) / max.udp) * cbase$mult[x]) | |
} | |
} | |
outdf <- outdf[outdf$time<steps, ] #prune the data to be processed | |
} | |
# now add the two bar plots with a "rect" | |
with(tcp.box, rect(xleft, ybottom + offset, xright, ytop + offset, col="steelblue")) | |
with(udp.box, rect(xleft, ybottom + offset, xright, ytop + offset, col="steelblue")) | |
# close off this image | |
dev.off() | |
# include something to watch while this is running... | |
if (image %% 10 == 0) { | |
#print(outdf) | |
cat(image, "of", length(allframes) + steps + 20, "\n") | |
} | |
# update: maybe want to modify this to use txtProgressBar() | |
} | |
end <- Sys.time() | |
print(end - start) | |
#run with avconc -i 'base%04d.png' -q 4 -o out.mp4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment