Skip to content

Instantly share code, notes, and snippets.

@jiapengjp
Last active December 17, 2015 09:32
Show Gist options
  • Save jiapengjp/37ea79f51963f2405b77 to your computer and use it in GitHub Desktop.
Save jiapengjp/37ea79f51963f2405b77 to your computer and use it in GitHub Desktop.
library(functional)
input<-"upload_enriched.log"
upload<-read.table(input, header=T, sep=",")
grp<-aggregate(time ~ bulksize+bulkline+shard+concurrency+essize+esdoc+round, data=upload, FUN=min)
time2<-aggregate(time ~ round, data=upload, FUN=max)
grp$time2<-time2$time
grp$durationSec<-grp$time2 - grp$time + 10 # post began 10 seconds before (after buffer created)
grp$bytePerSec<-grp$essize / grp$durationSec
grp$linePerSec<-grp$esdoc / grp$durationSec
plot(grp$shard, grp$linePerSec, xlab="shard", ylab="line/s",
col=ifelse(grp$concurrency==1, "firebrick",
ifelse(grp$concurrency==2, "gold",
ifelse(grp$concurrency==3, "chartreuse",
ifelse(grp$concurrency==4, "deepskyblue", "darkviolet")))))
#legend("topleft", legend=c("c1","c2","c3","c4","c5"),
# col=c("firebrick","gold","chartreuse","deepskyblue","darkviolet"),
# pch=rep(16,5))
lm.shard<-lm(linePerSec~shard, data = grp)
print(summary(lm.shard))
abline(lm.shard)
grp$shard2<-grp$shard^2
grp$shard3<-grp$shard^3
grp$shardR<-grp$shard^0.5
grp$shardL<-log(grp$shard)
lm.shard2<-lm(linePerSec~shard+shard2, data=grp)
lm.shard3<-lm(linePerSec~shard+shard2+shard3, data=grp)
lm.shardR<-lm(linePerSec~shard+shardR, data=grp)
lm.shardL<-lm(linePerSec~shard+shardL, data=grp)
print(summary(lm.shard2))
print(summary(lm.shard3))
print(summary(lm.shardR))
print(summary(lm.shardL))
shards<-seq(1,10)
pred.shard2<-predict(lm.shard2, list(shard=shards, shard2=shards^2))
pred.shard3<-predict(lm.shard3, list(shard=shards, shard2=shards^2, shard3=shards^3))
pred.shardR<-predict(lm.shardR, list(shard=shards, shardR=shards^0.5))
pred.shardL<-predict(lm.shardL, list(shard=shards, shardL=log(shards)))
lines(shards, pred.shard3, col="green", lwd=2)
lines(shards, pred.shardL, col="darkgreen", lwd=2)
lines(shards, pred.shardR, col="red", lwd=2)
lines(shards, pred.shard2, col="blue", lwd=2)
legend("bottomright", lty=rep(1,4), lwd=rep(2,4),
legend=c("Cube (52%)","Logarithm (52%)","Square Root (51%)","Quadratic (47%)"),
col=c("green","darkgreen","red","blue"))
grp$concurrency2<-grp$concurrency^2
grp$concurrency3<-grp$concurrency^3
grp$concurrencyR<-grp$concurrency^0.5
grp$concurrencyL<-log(grp$concurrency)
lm.bs2c2<-lm(linePerSec~bulkline+shard+shard2+concurrency+concurrency2, data=grp)
lm.bs3c3<-lm(linePerSec~bulkline+shard+shard2+shard3+concurrency+concurrency2+concurrency3, data=grp)
lm.bsRcR<-lm(linePerSec~bulkline+shard+shardR+concurrency+concurrencyR, data=grp)
lm.bsLcL<-lm(linePerSec~bulkline+shard+shardL+concurrency+concurrencyL, data=grp)
print(summary(lm.bs2c2))
print(summary(lm.bs3c3))
print(summary(lm.bsRcR))
print(summary(lm.bsLcL))
drawModel<-function(model, predfunc, title, maxpt, phi, theta){
x1r<-range(grp$shard)
x1seq<-seq(x1r[1], x1r[2], length=30)
x2r<-range(grp$concurrency)
x2seq<-seq(x2r[1], x2r[2], length=30)
z<-outer(x1seq, x2seq, predfunc)
zr<-range(z)
res<-persp(x1seq, x2seq, z, phi=phi, theta=theta, col="palegreen",
xlab="shard", ylab="concurrency", zlab="lines/s", ticktype="detailed")
title(title)
mtext(paste("Adjusted R-Squared:",
format(round(summary(model)$adj.r.squared * 100.0, 2)), "%", sep=""))
pt<-trans3d(maxpt$shard, maxpt$concurrency, predfunc(maxpt$shard, maxpt$concurrency), pmat=res)
ptl<-trans3d(maxpt$shard, maxpt$concurrency, zr[2]+(zr[2]-zr[1])/6, pmat=res)
ptz<-trans3d(maxpt$shard, maxpt$concurrency, zr[1], pmat=res)
ptx<-trans3d(maxpt$shard, if (theta>90) x2r[2] else x2r[1], zr[1], pmat=res)
pty<-trans3d(x1r[2], maxpt$concurrency, zr[1], pmat=res)
points(pt, pch=1, col="red")
segments(pt$x, pt$y, ptz$x, ptz$y, col="red", lty="dashed")
segments(ptz$x, ptz$y, ptx$x, ptx$y, col="red", lty="dashed")
segments(ptz$x, ptz$y, pty$x, pty$y, col="red", lty="dashed")
text(ptl$x, ptl$y, paste("s=", format(maxpt$shard), ", c=", format(maxpt$concurrency), " => ",
round(predfunc(maxpt$shard, maxpt$concurrency)), " lines/s", sep=""))
}
predict_bs3c3<-function(shard, concur, bl){
params<-data.frame(bulkline=bl,
shard=shard, shard2=shard^2, shard3=shard^3,
concurrency=concur, concurrency2=concur^2, concurrency3=concur^3)
predict(lm.bs3c3, newdata=params)
}
predict_bsLcL<-function(shard, concur, bl){
params<-data.frame(bulkline=bl,
shard=shard, shardL=log(shard),
concurrency=concur, concurrencyL=log(concur))
predict(lm.bsLcL, newdata=params)
}
best<-grp[grp$bulkline == min(grp$bulkline), c("bulkline", "shard", "concurrency")]
best$pbs3c3<-apply(best, 1, function(row) predict_bs3c3(row["shard"], row["concurrency"], row["bulkline"]))
best$pbsLcL<-apply(best, 1, function(row) predict_bsLcL(row["shard"], row["concurrency"], row["bulkline"]))
dev.new()
par(mfrow=c(1,3))
drawModel(lm.bs3c3, Curry(predict_bs3c3, bl=min(grp$bulkline)), "Cube", best[which.max(best$pbs3c3),], 35, 35)
drawModel(lm.bs3c3, Curry(predict_bs3c3, bl=min(grp$bulkline)), "Cube", best[which.max(best$pbs3c3),], 25, 90)
drawModel(lm.bs3c3, Curry(predict_bs3c3, bl=min(grp$bulkline)), "Cube", best[which.max(best$pbs3c3),], 25, 180)
dev.new()
par(mfrow=c(1,3))
drawModel(lm.bsLcL, Curry(predict_bsLcL, bl=min(grp$bulkline)), "Logarithm", best[which.max(best$pbsLcL),], 35, 35)
drawModel(lm.bsLcL, Curry(predict_bsLcL, bl=min(grp$bulkline)), "Logarithm", best[which.max(best$pbsLcL),], 25, 90)
drawModel(lm.bsLcL, Curry(predict_bsLcL, bl=min(grp$bulkline)), "Logarithm", best[which.max(best$pbsLcL),], 25, 180)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment