Skip to content

Instantly share code, notes, and snippets.

@nfisher
Created April 19, 2018 00:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nfisher/0bf9bdc3b69e55ba5bac75f1039a075f to your computer and use it in GitHub Desktop.
Save nfisher/0bf9bdc3b69e55ba5bac75f1039a075f to your computer and use it in GitHub Desktop.
R - histogram and scatter plot
library (ggplot2)
library (gridExtra)
# arg1 - online csv
# arg2 - base filename
options <- commandArgs(trailingOnly = TRUE)
basename = options[2]
csv_online = read.csv(options[1])
print("Data loaded")
csv_online$timeStamp = (csv_online$timeStamp - min(csv_online$timeStamp)) / 1000
print("Data modified")
min_resp <- min(csv_online$Latency)
max_resp <- max(csv_online$Latency)
pctl99_resp <- quantile(csv_online$Latency, probs=0.99, names=FALSE)
pctl90_resp <- quantile(csv_online$Latency, probs=0.90, names=FALSE)
pctl50_resp <- quantile(csv_online$Latency, probs=0.50, names=FALSE)
stddev <- sd(csv_online$Latency)
total_resp <- length(csv_online$Latency)
failed_resp <- sum(csv_online$responseCode != 200)
failed_perc <- failed_resp/total_resp * 100
long_resp <- sum(csv_online$Latency > 300)
long_perc <- long_resp/total_resp * 100
ok_resp <- sum(csv_online$responseCode == "200")
default_resp <- sum(csv_online$bytes != 4269 & csv_online$responseCode == "200" )
duration <- max(csv_online$timeStamp)
print("Data extracted")
bounds_df = data.frame(
Duration=duration,
Min=min_resp,
Max=max_resp,
Median=pctl50_resp,
PCTL90=pctl90_resp,
PCTL99=pctl99_resp,
StdDev=round(stddev, digits=2)
)
req_df = data.frame(
Total=total_resp,
Failed=failed_resp,
FailedPercentage=round(failed_perc, digits=2),
Long=long_resp,
LongPercentage=round(long_perc, digits=2),
OkResp=ok_resp,
DefaultResp=default_resp
)
scatter <- ggplot(csv_online, aes(x=timeStamp, y=Latency)) + geom_point(alpha=1/6, size=1.5, aes(colour=success)) +
labs(x="timestamp (s)", y="response time (ms)", title="Response time over time")
histo <- ggplot(csv_online, aes(x=Latency)) + geom_histogram(binwidth=25, origin=0) +
scale_y_sqrt() +
scale_x_continuous(limits=c(0,max_resp)) +
labs(x="response time (ms)", y="number of responses", title="Response Time Distribution (25ms bins)")
bounds_table <- tableGrob(bounds_df, show.rownames=FALSE, cols=expression(
"Dur (s)",
"Min (ms)",
"Max (ms)",
"Median (ms)",
"90th PCTL (ms)",
"99th PCTL (ms)",
"Std Dev"))
req_table <- tableGrob(req_df, show.rownames=FALSE, cols=expression(
"Total # Req.",
"# Failed Req.",
"% Failed Req.",
"# Req. >300ms",
"% Req. >300ms",
"# 200 OK",
"# Def. Cont."))
dpi <- 72
png(paste("results/", basename, ".png", sep=""), width=dpi*9, height=dpi*14, res=dpi)
grid.arrange(arrangeGrob(bounds_table, req_table, ncol=1), arrangeGrob(histo, scatter, ncol=1), main=paste(basename, options[1], sep=" "), ncol=1, heights=c(1, 4))
dev.off()
library (ggplot2)
library (gridExtra)
# arg1 - online csv
# arg2 - offline csv
# arg3 - filename postfix
options <- commandArgs(trailingOnly = TRUE)
basename = options[3]
csv_online = read.csv(options[1])
csv_online$timeStamp = (csv_online$timeStamp - min(csv_online$timeStamp)) / 1000
csv_offline = read.csv(options[2])
csv_offline$timeStamp = (csv_offline$timeStamp - min(csv_offline$timeStamp)) / 1000
histo_max = max(c(max(csv_online$Latency), max(csv_offline$Latency)))
histo_online <- ggplot(csv_online, aes(x=Latency)) + geom_histogram(binwidth=100, origin=0, fill="green") +
scale_y_sqrt() +
scale_x_continuous(limits=c(100,histo_max)) +
labs(x="response time (ms, excludes first 100ms bin)", y="number of responses", title="Origin Online Response Time Distribution (100ms bins)")
histo_offline <- ggplot(csv_offline, aes(x=Latency)) + geom_histogram(binwidth=100, origin=0, fill="red") +
scale_y_sqrt() +
scale_x_continuous(limits=c(100,histo_max)) +
labs(x="response time (ms, excludes first 100ms bin)", y="number of responses", title="Origin Offline Response Time Distribution (100ms bins)")
scatter <- ggplot(csv_online, aes(timeStamp, Latency)) + geom_point(data=csv_online, size=2, colour="green", alpha=1/4, label="Online") +
geom_point(data=csv_offline, size=2, colour="red", alpha=1/4, label="Offline") +
labs(x="sample point (s)", y="response time (ms)", title="Scatter")
pdf(paste("results/", basename, ".pdf", sep=""), paper="a4")
grid.arrange(histo_online, histo_offline, scatter, main=basename, ncol=1)
dev.off()
@nfisher
Copy link
Author

nfisher commented Apr 19, 2018

histo.R generates this;

20130730-20h12_origin

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment