Skip to content

Instantly share code, notes, and snippets.

@davidski
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidski/1c675266945a61db7ef5 to your computer and use it in GitHub Desktop.
Save davidski/1c675266945a61db7ef5 to your computer and use it in GitHub Desktop.
ports ~ country + time visualizaton code as stolen from @jayjacobs
# Inbound port movie maker
# Adopted nearly wholesale from @jayjacobs at DataDrivenSecurity.info
# mangled by @davidski
# which weeks should we look at?
#whichweek <- c(27, 28, 29, 30)
# how many countries to show?
numcountry <- 25 # 20 at first
tcp.protocol <- 6
udp.protocol <- 17
# read in event_data from your friendly neighborhood data store
event_data <- read.csv("results_scan.csv", header=T, sep="\t", comment.char="",
colClasses=c("character", "factor", "integer", "integer"))
# convert datetime to POSIX date/time object
event_data$datetime <- strptime(event_data$X.timestamp,
format='%Y-%m-%dT%H:%M:%S', tz='GMT')
names(event_data) <- c("timestamp", "country", "proto", "dpt", "datetime") #normalize names to what @jayjacobs expects
# drop any weird date formats
# and filter just the columns we want
#event_data <- event_data[complete.cases(event_data[ ,"datetime"]),
# c("datetime", "proto", "dpt", "country")]
event_data <- event_data[, c("datetime", "proto", "dpt", "country")]
# filter out the weeks.
#week <- factor(format(event_data$datetime, "%V"))
#mx <- event_data[week %in% whichweek, ] # mx is subset of event_data data
# finally, filter for only those entries where we have a port
mx <- event_data[!is.na(event_data$dpt), ]
# divide into 5 minute chunks
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"),
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5))))
# create the timeline to tick through
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300)
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"),
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5))))
# allframes now has every possible "frame" we want to show in it.
# set up source country names in "cnames"
cname.tbl <- table(mx$country)
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl)) > 0], decreasing=T)
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T)
cnames <- head(cnames[nchar(cnames) > 0], numcountry)
# setting a seed to ensure colors are consistent
library(RColorBrewer)
#set.seed(2)
#ccolor <- as.character(rainbow(length(cnames)))
ccolor <- colorRampPalette(brewer.pal(12, 'Set3'))(length(cnames))
names(ccolor) <- cnames
# now filter out where country is known.
mx <- mx[mx$country %in% cnames,]
# add in a frequency counter for later agregation
mx$freq <- 1
# set up dimensions of image, set for widescreen laptop
ht <- 900
wt <- 1600
# set up label positions, space them evenly
cht <- ht / length(cnames)
# cpos has the country positions for y value
cpos <- seq(cht / 2, ht, by=cht)
names(cpos) <- cnames
# look at ports
# set number of ports to view
numport <- 100
#summarize the frequency of the ports (number of hits)
tcp <- summary(factor(mx$dpt[mx$proto==tcp.protocol]), maxsum=numport)
udp <- summary(factor(mx$dpt[mx$proto==udp.protocol]), maxsum=numport)
#set the last element to other as the catch all
tcp.name <- rev(c(sort(as.numeric(head(names(tcp), numport - 1))), "Other"))
udp.name <- rev(c(sort(as.numeric(head(names(udp), numport - 1))), "Other"))
#determine the spacing for our port names across the height of the image
ht.tcp <- ht / length(tcp.name)
ht.udp <- ht / length(udp.name)
# hpos has the host positions for y value
ypos.tcp <- seq(ht.tcp/2, ht, by=ht.tcp)
ypos.udp <- seq(ht.udp/2, ht, by=ht.udp)
names(ypos.tcp) <- tcp.name
names(ypos.udp) <- udp.name
# need to know how to scale the bar plots on the side
max.tcp <- max(tcp)
max.udp <- max(udp)
# set x position for hosts and countries
tcpx <- wt - 150 #width minus constant for right offset
udpx <- 150 #left ofset
hostx <- 50 #offset for the credits blurb
countryx <- wt / 2 #country is centered
gap <- 1 # gap between bars in barplots
################
# okay, this is where we set things up to loop on
start <- Sys.time() # timing it
steps <- 60 # how many frames for moving a ball across the screen
outdf <- data.frame() # data.frame of all balls
# data for barplot on the host side
tcp.box <- data.frame(name=tcp.name, count=0,
xleft=tcpx, ybottom=ypos.tcp - (ht.tcp/2) + gap,
xright=tcpx, ytop=ypos.tcp + (ht.tcp/2) - gap, row.names = NULL)
udp.box <- data.frame(name=udp.name, count=0,
xleft=udpx-8, ybottom=ypos.udp-(ht.udp/2)+gap,
xright=udpx-8, ytop=ypos.udp+(ht.udp/2)-gap, row.names=NULL)
date.label <- NULL
#size of all labels is going to be 0.5 by default
tcp.size <- rep(0.5, length(tcp.name))
names(tcp.size) <- tcp.name
udp.size <- rep(0.5, length(udp.name))
names(udp.size) <- udp.name
# allframes <- allframes[1:40] # for testing, can cut down to a handful of frames
# loop through all the image frames, plus time for the balls to "land" and a
# smidge of padding to "make the experience more pleasureable"
# this is a beast of a loop to run through. Is it possible to parallelize this operation?
for(image in seq(length(allframes)+steps+20)) {
# image is the frame number we are showing.
# set "it" to be the frame ID, or zero if we are done reading in new data
it <- ifelse(image <= length(allframes), allframes[image], 0)
# test if we have any data to read for this frame
if (sum(mx$frame==it) > 0) {
# update the date label (to be shown at the top) - weekday, month, day, HH:MM TZ
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p %Z")
# prep the data
# aggregate, per port and country combination
tmx <- aggregate(freq ~ dpt + country + proto, data=mx[mx$frame==it, ], FUN=sum)
# foreach host+country combination, create a row in data.frame
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) {
country <- as.character(tmx$country[i])
fromy <- cpos[country]
if (tmx$proto[i]==tcp.protocol) {
dport <- as.character(ifelse(any(names(ypos.tcp) %in% tmx$dpt[i]), tmx$dpt[i], "Other"))
toy <- ifelse(any(names(ypos.tcp) %in% dport), ypos.tcp[dport], ypos.tcp["Other"])
tox <- tcpx
mult <- 1
fromx <- countryx + (nchar(country) * 7 * mult)
dpt <- tmx$dpt[i]
} else {
dport <- as.character(ifelse(any(names(ypos.udp) %in% tmx$dpt[i]), tmx$dpt[i], "Other"))
toy <- ifelse(any(names(ypos.udp) %in% dport), ypos.udp[dport], ypos.udp["Other"])
tox <- udpx
mult <- -1
fromx <- countryx + (nchar(country) * 7 * mult)
dpt <- tmx$dpt[i]
}
data.frame(fromx=fromx, fromy=fromy,
tox= tox - (nchar(dport) * 5 * mult),
toy= toy + rnorm(1, mean=0, sd=ht.tcp / 2),
curx=fromx, cury=fromy, col=ccolor[country],
size=tmx$freq[i], time=1, mult=mult,
dport=dport, country=country, dpt=dpt, row.names=NULL)
}))
} else {
# else we have no new data, just make empty data.frame
newdf <- data.frame()
}
if(nrow(outdf)>0) { # we have balls in the air
# update the current value based on which step the ball is in.
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy
outdf$time <- outdf$time + 1
# rbind the old data with new data
if (nrow(newdf)) {
outdf <- rbind(outdf, newdf)
}
} else { # fresh df
if (nrow(newdf)) {
outdf <- newdf
}
}
# set up plot
png(filename=sprintf("ports/base%04d.png", image), width=wt, height=ht)
# set small margin in inches
par(mai=c(0,0.2,0,0.2))
# open up an empty plot
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1, ht + 100),
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i")
offset <- 30
# add country labels
text(countryx+5, cpos+offset, labels=cnames, cex=2, adj=0.5)
# add tcp labels
bigport <- outdf$dport[outdf$time==steps & outdf$mult==1]
tcp.size <- ifelse(tcp.size>0.5, tcp.size * 0.9, 0.5)
tcp.size[tcp.name %in% bigport] <- 2
text(tcpx-5, ypos.tcp+offset, labels=tcp.name, cex=tcp.size, adj=1)
# add tcp labels
bigport <- outdf$dport[outdf$time==steps & outdf$mult==-1]
# udp.size <- sapply(udp.size, function(x) mean(c(x, 1)))
udp.size <- ifelse(udp.size>0.5, udp.size * 0.9, 0.5)
udp.size[udp.name %in% bigport] <- 2
text(udpx-5, ypos.udp+offset, labels=udp.name, cex=udp.size, adj=0)
# add the date labels
text(wt/2, ht+offset+5, labels=date.label, cex=3, adj=c(0.5, 0))
# add tcp/udp header
text(tcpx, ht+offset+5, labels="TCP", cex=2, adj=c(0.5, 0))
text(udpx, ht+offset+5, labels="UDP", cex=2, adj=c(0.5, 0))
# stick a little URL in the corner
text(hostx, 8, labels="Thanks to http://datadrivensecurity.info", cex=2,
col="slateblue", adj=c(0.5, 0), font=3)
# now include all the points (balls) in the plot
with(outdf, points(curx, cury + offset, type="p", pch=16,
col=as.character(col), cex=log10(size))) #cex was sqrt(size)
# test to see if we should increase the country barplot (look for time==1)
if (sum(outdf$time==steps) > 0) {
cbase <- aggregate(size ~ dport + mult, data=outdf[outdf$time==steps, ], FUN=sum)
for(x in seq(nrow(cbase))) {
if (cbase$mult[x]==1) {
thisone <- which(tcp.box$name==as.character(cbase$dport)[x])
tcp.box$xright[thisone] <- tcp.box$xright[thisone] +
(150 * (as.numeric(cbase$size[x]) / max.tcp) * cbase$mult[x])
} else {
thisone <- which(udp.box$name==as.character(cbase$dport)[x])
udp.box$xright[thisone] <- udp.box$xright[thisone] +
(150 * (as.numeric(cbase$size[x]) / max.udp) * cbase$mult[x])
}
}
outdf <- outdf[outdf$time<steps, ] #prune the data to be processed
}
# now add the two bar plots with a "rect"
with(tcp.box, rect(xleft, ybottom + offset, xright, ytop + offset, col="steelblue"))
with(udp.box, rect(xleft, ybottom + offset, xright, ytop + offset, col="steelblue"))
# close off this image
dev.off()
# include something to watch while this is running...
if (image %% 10 == 0) {
#print(outdf)
cat(image, "of", length(allframes) + steps + 20, "\n")
}
# update: maybe want to modify this to use txtProgressBar()
}
end <- Sys.time()
print(end - start)
#run with avconc -i 'base%04d.png' -q 4 -o out.mp4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment