Created
May 19, 2014 13:29
-
-
Save jayjacobs/0667ffa52ea34dd80042 to your computer and use it in GitHub Desktop.
This creates the slides used in https://www.youtube.com/watch?v=GEjzCR1fN8Y
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# which weeks should we look at? | |
whichweek <- c(27, 28, 29, 30) | |
# how many countries to show? | |
numcountry <- 25 # 20 at first | |
# read in marx data | |
marx <- read.csv("marx-geo.csv") | |
# convert datetime to POSIX date/time object | |
marx$datetime <- strptime(marx$datetime, format='%Y-%m-%d %H:%M:%S') | |
# drop any weird date formats | |
# and filter just the columns we want | |
marx <- marx[complete.cases(marx[ ,"datetime"]), | |
c("datetime", "proto", "dpt", "country")] | |
# filter out the weeks. | |
week <- factor(format(marx$datetime, "%V")) | |
mx <- marx[week %in% whichweek, ] # mx is subset of marx data | |
# finally, filter for only those we have a port | |
mx <- mx[!is.na(mx$dpt), ] | |
# divide into 5 minute chunks | |
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5)))) | |
# create the timeline to tick through. | |
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300) | |
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5)))) | |
# allframes now has every possible "frame" we want to show in it. | |
# set up source country names in "cnames" | |
cname.tbl <- table(mx$country) | |
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl))>0], decreasing=T) | |
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T) | |
cnames <- head(cnames[nchar(cnames)>0], numcountry) | |
# setting a seed to repeat the colors | |
set.seed(2) | |
ccolor <- as.character(rainbow(length(cnames))) | |
names(ccolor) <- cnames | |
# now filter out where country is known. | |
mx <- mx[mx$country %in% cnames,] | |
# add in a frequency counter for later agregation | |
mx$freq <- 1 | |
# set up dimensions of image | |
ht <- 1152 | |
wt <- 1920 | |
# set up label positions, space them evenly | |
cht <- ht/length(cnames) | |
# cpos has the country positions for y value | |
cpos <- seq(cht/2, ht, by=cht) | |
names(cpos) <- cnames | |
# look at ports | |
# set number of ports to view | |
numport <- 200 | |
tcp <- summary(factor(mx$dpt[mx$proto=="TCP"]), maxsum=numport) | |
tcp.name <- rev(c(sort(as.numeric(head(names(tcp), numport-1))), "Other")) | |
udp <- summary(factor(mx$dpt[mx$proto=="UDP"]), maxsum=numport) | |
udp.name <- rev(c(sort(as.numeric(head(names(udp), numport-1))), "Other")) | |
ht.tcp <- ht/length(tcp.name) | |
ht.udp <- ht/length(udp.name) | |
# hpos has the host positions for y value | |
ypos.tcp <- seq(ht.tcp/2, ht, by=ht.tcp) | |
ypos.udp <- seq(ht.udp/2, ht, by=ht.udp) | |
names(ypos.tcp) <- tcp.name | |
names(ypos.udp) <- udp.name | |
# need to know how to scale the bar plots on the side | |
max.tcp <- max(tcp) | |
max.udp <- max(udp) | |
# set x position for hosts and countries | |
tcpx <- wt-150 | |
udpx <- 150 | |
countryx <- wt/2 | |
# gap between bars in barplots | |
gap <- 1 | |
################ | |
# okay, this is where we set things up to loop on | |
start <- Sys.time() # timing it. | |
steps <- 60 # how many frame to move a ball across the screen | |
outdf <- data.frame() # data.frame of all balls | |
# data for barplot on the host side | |
tcp.box <- data.frame(name=tcp.name, count=0, | |
xleft=tcpx, ybottom=ypos.tcp-(ht.tcp/2)+gap, | |
xright=tcpx, ytop=ypos.tcp+(ht.tcp/2)-gap, row.names=NULL) | |
udp.box <- data.frame(name=udp.name, count=0, | |
xleft=udpx-8, ybottom=ypos.udp-(ht.udp/2)+gap, | |
xright=udpx-8, ytop=ypos.udp+(ht.udp/2)-gap, row.names=NULL) | |
date.label <- NULL | |
tcp.size <- rep(0.5, length(tcp.name)) | |
names(tcp.size) <- tcp.name | |
udp.size <- rep(0.5, length(udp.name)) | |
names(udp.size) <- udp.name | |
#allframes <- allframes[1:40] | |
for(image in seq(length(allframes)+steps+20)) { | |
# image is the frame number we are showing. | |
# set "it" to be the frame ID, or zero if we are done reading in new data | |
it <- ifelse(image <= length(allframes), allframes[image], 0) | |
# test if we have any data to read for this frame | |
if (sum(mx$frame==it)>0) { | |
# update the date label (to be shown at the top) | |
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p") | |
# prep the data | |
# aggregate, per port and country combination | |
tmx <- aggregate(freq ~ dpt + country + proto, data=mx[mx$frame==it, ], FUN=sum) | |
# foreach host+country combination, create a row in data.frame | |
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) { | |
country <- as.character(tmx$country[i]) | |
fromy <- cpos[country] | |
if (tmx$proto[i]=="TCP") { | |
dport <- as.character(ifelse(any(names(ypos.tcp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.tcp) %in% dport), ypos.tcp[dport], ypos.tcp["Other"]) | |
tox <- tcpx | |
mult <- 1 | |
fromx <- countryx+(nchar(country)*7*mult) | |
} else { | |
dport <- as.character(ifelse(any(names(ypos.udp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.udp) %in% dport), ypos.udp[dport], ypos.udp["Other"]) | |
tox <- udpx | |
mult <- -1 | |
fromx <- countryx+(nchar(country)*7*mult) | |
} | |
data.frame(fromx=fromx, fromy=fromy, | |
tox=tox - (nchar(dport)*5*mult), | |
toy=toy+rnorm(1, mean=0, sd=ht.tcp/2), | |
curx=fromx, cury=fromy, col=ccolor[country], | |
size=tmx$freq[i], time=1, mult=mult, | |
dport=dport, country=country, row.names=NULL) | |
})) | |
} else { | |
# else we have no new data, just make empty data.frame | |
newdf <- data.frame() | |
} | |
if(nrow(outdf)>0) { # we have balls in the air | |
# update the current value based on which step the ball is in. | |
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx | |
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy | |
outdf$time <- outdf$time + 1 | |
# rbind the old data with new data | |
if (nrow(newdf)) { | |
outdf <- rbind(outdf, newdf) | |
} | |
} else { # fresh df | |
if (nrow(newdf)) { | |
outdf <- newdf | |
} | |
} | |
# set up plot | |
png(filename=sprintf("ports/base%04d.png", image), width=wt, height=ht) | |
# set small margin in inches | |
par(mai=c(0,0.2,0,0.2)) | |
# open up an empty plot | |
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1,ht+100), | |
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i") | |
offset <- 30 | |
# add country labels | |
text(countryx+5, cpos+offset, labels=cnames, cex=2, adj=0.5) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==1] | |
#tcp.size <- sapply(tcp.size, function(x) mean(c(x,1))) | |
tcp.size <- ifelse(tcp.size>0.5, tcp.size * 0.9, 0.5) | |
tcp.size[tcp.name %in% bigport] <- 2 | |
text(tcpx-5, ypos.tcp+offset, labels=tcp.name, cex=tcp.size, adj=1) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==-1] | |
# udp.size <- sapply(udp.size, function(x) mean(c(x,1))) | |
udp.size <- ifelse(udp.size>0.5, udp.size * 0.9, 0.5) | |
udp.size[udp.name %in% bigport] <- 2 | |
text(udpx-5, ypos.udp+offset, labels=udp.name, cex=udp.size, adj=0) | |
# add the date labels | |
text(wt/2, ht+offset+5, labels=date.label, cex=3, adj=c(0.5, 0)) | |
# add tcp/udp header | |
text(tcpx, ht+offset+5, labels="TCP", cex=2, adj=c(0.5, 0)) | |
text(udpx, ht+offset+5, labels="UDP", cex=2, adj=c(0.5, 0)) | |
# stick a little URL in the corner | |
text(hostx, 8, labels="http://datadrivensecurity.info", cex=2, col="slateblue", adj=c(0.5,0), font=3) | |
# now include all the points (balls) in the plot | |
with(outdf, points(curx, cury+offset, type="p", pch=16, col=as.character(col), cex=sqrt(size))) | |
# test to see if we should increase the country barplot (look for time==1) | |
if (sum(outdf$time==steps)>0) { | |
cbase <- aggregate(size ~ dport + mult, data=outdf[outdf$time==steps, ], FUN=sum) | |
for(x in seq(nrow(cbase))) { | |
if (cbase$mult[x]==1) { | |
thisone <- which(tcp.box$name==as.character(cbase$dport)[x]) | |
tcp.box$xright[thisone] <- tcp.box$xright[thisone] + | |
(150*(as.numeric(cbase$size[x])/max.tcp)*cbase$mult[x]) | |
} else { | |
thisone <- which(udp.box$name==as.character(cbase$dport)[x]) | |
udp.box$xright[thisone] <- udp.box$xright[thisone] + | |
(150*(as.numeric(cbase$size[x])/max.udp)*cbase$mult[x]) | |
} | |
} | |
outdf <- outdf[outdf$time<steps, ] | |
} | |
# now add the two bar plots with a "rect" | |
with(tcp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue")) | |
with(udp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue")) | |
# close off this image | |
dev.off() | |
# include something to watch while this is running... | |
if (image %% 10 == 0) { | |
#print(outdf) | |
cat(image, "of", length(allframes)+steps+20, "\n") | |
} | |
#if (image==70) { stop() } | |
} | |
end <- Sys.time() | |
print(end-start) | |
# For Reference, 8144 frames with top 200 ports took 2.228126 hours | |
# now we create an HD movie (hopefully) with this: | |
# avconv -f image2 -i ports/base%04d.png -r 25 -b 50000000 -s 1920x1080 -an output.mp4 | |
# though searching for "stop motion" should yield more options for you. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment