Skip to content

Instantly share code, notes, and snippets.

@jayjacobs
Created June 15, 2014 17:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jayjacobs/b42ac3661d38f2b83350 to your computer and use it in GitHub Desktop.
Save jayjacobs/b42ac3661d38f2b83350 to your computer and use it in GitHub Desktop.
Creating a Video on the TCP/UDP ports in Marx data
# which weeks should we look at?
whichweek <- c(27, 28, 29, 30)
# how many countries to show?
numcountry <- 25 # 20 at first
# read in marx data
marx <- read.csv("marx-geo.csv") # get from dds.ec
# convert datetime to POSIX date/time object
marx$datetime <- strptime(marx$datetime, format='%Y-%m-%d %H:%M:%S')
# drop any weird date formats
# and filter just the columns we want
marx <- marx[complete.cases(marx[ ,"datetime"]),
c("datetime", "proto", "dpt", "country")]
# filter out the weeks.
week <- factor(format(marx$datetime, "%V"))
mx <- marx[week %in% whichweek, ] # mx is subset of marx data
# finally, filter for only those we have a port
mx <- mx[!is.na(mx$dpt), ]
# divide into 5 minute chunks
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"),
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5))))
# create the timeline to tick through.
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300)
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"),
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5))))
# allframes now has every possible "frame" we want to show in it.
# set up source country names in "cnames"
cname.tbl <- table(mx$country)
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl))>0], decreasing=T)
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T)
cnames <- head(cnames[nchar(cnames)>0], numcountry)
# setting a seed to repeat the colors
set.seed(2)
ccolor <- as.character(rainbow(length(cnames)))
names(ccolor) <- cnames
# now filter out where country is known.
mx <- mx[mx$country %in% cnames,]
# add in a frequency counter for later agregation
mx$freq <- 1
# set up dimensions of image
ht <- 1152
wt <- 1920
# set up label positions, space them evenly
cht <- ht/length(cnames)
# cpos has the country positions for y value
cpos <- seq(cht/2, ht, by=cht)
names(cpos) <- cnames
# look at ports
# set number of ports to view
numport <- 200
tcp <- summary(factor(mx$dpt[mx$proto=="TCP"]), maxsum=numport)
tcp.name <- rev(c(sort(as.numeric(head(names(tcp), numport-1))), "Other"))
udp <- summary(factor(mx$dpt[mx$proto=="UDP"]), maxsum=numport)
udp.name <- rev(c(sort(as.numeric(head(names(udp), numport-1))), "Other"))
ht.tcp <- ht/length(tcp.name)
ht.udp <- ht/length(udp.name)
# hpos has the host positions for y value
ypos.tcp <- seq(ht.tcp/2, ht, by=ht.tcp)
ypos.udp <- seq(ht.udp/2, ht, by=ht.udp)
names(ypos.tcp) <- tcp.name
names(ypos.udp) <- udp.name
# need to know how to scale the bar plots on the side
max.tcp <- max(tcp)
max.udp <- max(udp)
# set x position for hosts and countries
tcpx <- wt-150
udpx <- 150
countryx <- wt/2
# gap between bars in barplots
gap <- 1
################
# okay, this is where we set things up to loop on
start <- Sys.time() # timing it.
steps <- 60 # how many frame to move a ball across the screen
outdf <- data.frame() # data.frame of all balls
# data for barplot on the host side
tcp.box <- data.frame(name=tcp.name, count=0,
xleft=tcpx, ybottom=ypos.tcp-(ht.tcp/2)+gap,
xright=tcpx, ytop=ypos.tcp+(ht.tcp/2)-gap, row.names=NULL)
udp.box <- data.frame(name=udp.name, count=0,
xleft=udpx-8, ybottom=ypos.udp-(ht.udp/2)+gap,
xright=udpx-8, ytop=ypos.udp+(ht.udp/2)-gap, row.names=NULL)
date.label <- NULL
tcp.size <- rep(0.5, length(tcp.name))
names(tcp.size) <- tcp.name
udp.size <- rep(0.5, length(udp.name))
names(udp.size) <- udp.name
# for testing, can cut down to a handful of frames
# allframes <- allframes[1:40]
for(image in seq(length(allframes)+steps+20)) {
# image is the frame number we are showing.
# set "it" to be the frame ID, or zero if we are done reading in new data
it <- ifelse(image <= length(allframes), allframes[image], 0)
# test if we have any data to read for this frame
if (sum(mx$frame==it)>0) {
# update the date label (to be shown at the top)
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p")
# prep the data
# aggregate, per port and country combination
tmx <- aggregate(freq ~ dpt + country + proto, data=mx[mx$frame==it, ], FUN=sum)
# foreach host+country combination, create a row in data.frame
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) {
country <- as.character(tmx$country[i])
fromy <- cpos[country]
if (tmx$proto[i]=="TCP") {
dport <- as.character(ifelse(any(names(ypos.tcp) %in% tmx$dpt[i]), tmx$dpt[i], "Other"))
toy <- ifelse(any(names(ypos.tcp) %in% dport), ypos.tcp[dport], ypos.tcp["Other"])
tox <- tcpx
mult <- 1
fromx <- countryx+(nchar(country)*7*mult)
} else {
dport <- as.character(ifelse(any(names(ypos.udp) %in% tmx$dpt[i]), tmx$dpt[i], "Other"))
toy <- ifelse(any(names(ypos.udp) %in% dport), ypos.udp[dport], ypos.udp["Other"])
tox <- udpx
mult <- -1
fromx <- countryx+(nchar(country)*7*mult)
}
data.frame(fromx=fromx, fromy=fromy,
tox=tox - (nchar(dport)*5*mult),
toy=toy+rnorm(1, mean=0, sd=ht.tcp/2),
curx=fromx, cury=fromy, col=ccolor[country],
size=tmx$freq[i], time=1, mult=mult,
dport=dport, country=country, row.names=NULL)
}))
} else {
# else we have no new data, just make empty data.frame
newdf <- data.frame()
}
if(nrow(outdf)>0) { # we have balls in the air
# update the current value based on which step the ball is in.
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy
outdf$time <- outdf$time + 1
# rbind the old data with new data
if (nrow(newdf)) {
outdf <- rbind(outdf, newdf)
}
} else { # fresh df
if (nrow(newdf)) {
outdf <- newdf
}
}
# set up plot
png(filename=sprintf("ports/base%04d.png", image), width=wt, height=ht)
# set small margin in inches
par(mai=c(0,0.2,0,0.2))
# open up an empty plot
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1,ht+100),
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i")
offset <- 30
# add country labels
text(countryx+5, cpos+offset, labels=cnames, cex=2, adj=0.5)
# add tcp labels
bigport <- outdf$dport[outdf$time==steps & outdf$mult==1]
tcp.size <- ifelse(tcp.size>0.5, tcp.size * 0.9, 0.5)
tcp.size[tcp.name %in% bigport] <- 2
text(tcpx-5, ypos.tcp+offset, labels=tcp.name, cex=tcp.size, adj=1)
# add tcp labels
bigport <- outdf$dport[outdf$time==steps & outdf$mult==-1]
# udp.size <- sapply(udp.size, function(x) mean(c(x,1)))
udp.size <- ifelse(udp.size>0.5, udp.size * 0.9, 0.5)
udp.size[udp.name %in% bigport] <- 2
text(udpx-5, ypos.udp+offset, labels=udp.name, cex=udp.size, adj=0)
# add the date labels
text(wt/2, ht+offset+5, labels=date.label, cex=3, adj=c(0.5, 0))
# add tcp/udp header
text(tcpx, ht+offset+5, labels="TCP", cex=2, adj=c(0.5, 0))
text(udpx, ht+offset+5, labels="UDP", cex=2, adj=c(0.5, 0))
# stick a little URL in the corner
text(hostx, 8, labels="http://datadrivensecurity.info", cex=2, col="slateblue", adj=c(0.5,0), font=3)
# now include all the points (balls) in the plot
with(outdf, points(curx, cury+offset, type="p", pch=16, col=as.character(col), cex=sqrt(size)))
# test to see if we should increase the country barplot (look for time==1)
if (sum(outdf$time==steps)>0) {
cbase <- aggregate(size ~ dport + mult, data=outdf[outdf$time==steps, ], FUN=sum)
for(x in seq(nrow(cbase))) {
if (cbase$mult[x]==1) {
thisone <- which(tcp.box$name==as.character(cbase$dport)[x])
tcp.box$xright[thisone] <- tcp.box$xright[thisone] +
(150*(as.numeric(cbase$size[x])/max.tcp)*cbase$mult[x])
} else {
thisone <- which(udp.box$name==as.character(cbase$dport)[x])
udp.box$xright[thisone] <- udp.box$xright[thisone] +
(150*(as.numeric(cbase$size[x])/max.udp)*cbase$mult[x])
}
}
outdf <- outdf[outdf$time<steps, ]
}
# now add the two bar plots with a "rect"
with(tcp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue"))
with(udp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue"))
# close off this image
dev.off()
# include something to watch while this is running...
if (image %% 10 == 0) {
#print(outdf)
cat(image, "of", length(allframes)+steps+20, "\n")
}
# update: maybe want to modify this to use txtProgressBar()
}
end <- Sys.time()
print(end-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment