Created
March 9, 2016 16:10
-
-
Save canimus/febd72c4f981d40a1819 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "ECU - Performance Test / Notifications" | |
author: "Herminio Vazquez" | |
date: "9 March 2016" | |
output: html_document | |
--- | |
## Introduction | |
The following statistical analysis identifies discrepancies in the statistics obtained during the performance test executions of business scenarios in ECU for the Blackboard Notifications Performance Testing Cycle, conducted by Planit Software Testing to identify the behaviour identified in the production environment. | |
Response times in this report are presented in __seconds (sec)__ | |
## Data | |
```{r echo=FALSE, warning=F, message=F} | |
library(dplyr) # Library for data frame manipulation | |
library(tidyr) # Library for data frame manipulation | |
library(xtable) # Library for table formats | |
library(knitr) # Library for markdown in R | |
library(lubridate) # Library for date operations | |
library(ggplot2) # Library to plot graphs | |
library(grid) # Library for arrangement of plots in screen | |
options(digits = 2) | |
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) | |
``` | |
```{r} | |
# Loading data sets | |
data1 <- read.csv("/Volumes/usbdata/ecu_not_1.jtl", header=TRUE) | |
data2 <- read.csv("/Volumes/usbdata/ecu_not_2.jtl", header=TRUE) | |
data3 <- read.csv("/Volumes/usbdata/ecu_not_3.jtl", header=TRUE) | |
# Converting Unix EPOC to Date Time Objects | |
data1[,1] <- as.POSIXct(data1[,1]/1000, origin = "1970-01-01") | |
data2[,1] <- as.POSIXct(data2[,1]/1000, origin = "1970-01-01") | |
data3[,1] <- as.POSIXct(data3[,1]/1000, origin = "1970-01-01") | |
# Converting milliseconds into seconds | |
data1$elapsed = data1$elapsed/1000 | |
data2$elapsed = data2$elapsed/1000 | |
data3$elapsed = data3$elapsed/1000 | |
data1 <- data1 %>% | |
separate(threadName, c("bp", "thread"), sep=" ") %>% | |
separate(bp, c("machine", "bp_number", "action"), sep="_") %>% | |
select(-machine) | |
data2 <- data2 %>% | |
separate(threadName, c("bp", "thread"), sep=" ") %>% | |
separate(bp, c("machine", "bp_number", "action"), sep="_") %>% | |
select(-machine) | |
data3 <- data3 %>% | |
separate(threadName, c("bp", "thread"), sep=" ") %>% | |
separate(bp, c("machine", "bp_number", "action"), sep="_") %>% | |
select(-machine) | |
dur1 <- duration(interval(min(data1$timeStamp), max(data1$timeStamp))) | |
dur2 <- duration(interval(min(data2$timeStamp), max(data2$timeStamp))) | |
dur3 <- duration(interval(min(data3$timeStamp), max(data3$timeStamp))) | |
``` | |
### Exploring Data Sets / Macro Statistics | |
This section provides a summary taking in consideration an aggregation of the metrics for the entire scenario. | |
It serves as a single point of reference to spot macro differences in the execution of the performance test scenarios. | |
It includes macro stats for the following factors: | |
* Total number of transactions | |
* Pass ratio | |
* Duration | |
* 90th Percentile for all response times | |
```{r comment=NA} | |
# Total number of transactions per cycle | |
transactions_analysis <- data.frame( | |
x=c("cycle1", "cycle2", "cycle3"), | |
y=c(nrow(data1), nrow(data2), nrow(data3))) | |
# Successful | |
transactions_analysis$pass <- c( | |
(nrow(data1 %>% filter(success == "true") %>% select(success))/nrow(data1))*100, | |
(nrow(data2 %>% filter(success == "true") %>% select(success))/nrow(data2))*100, | |
(nrow(data3 %>% filter(success == "true") %>% select(success))/nrow(data3))*100 | |
) | |
# Calculate Percentiles for transaction response times | |
transactions_analysis$pct90 <- c( | |
quantile(data1$elapsed, .9), | |
quantile(data2$elapsed, .9), | |
quantile(data3$elapsed, .9)) | |
transactions_analysis$duration <- c( | |
dur1, | |
dur2, | |
dur3 | |
) | |
# Name the columns | |
names(transactions_analysis) <- c("Scenario", "Transactions", "Pass Pctg.", "90_Percentile", "Duration") | |
``` | |
#### Cycle Summary | |
```{r} | |
# Printing summary table | |
kable(format(transactions_analysis, big.mark = ",")) | |
``` | |
```{r echo=FALSE, warning=F, message=F} | |
# Creating Summary Tables | |
summary1 <- data1 %>% group_by(label) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=1) | |
summary2 <- data2 %>% group_by(label) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=2) | |
summary3 <- data3 %>% group_by(label) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=3) | |
host1 <- data1 %>% group_by(Hostname) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=1) | |
host2 <- data2 %>% group_by(Hostname) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=2) | |
host3 <- data3 %>% group_by(Hostname) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=3) | |
bp1 <- data1 %>% group_by(bp_number) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=1) | |
bp2 <- data2 %>% group_by(bp_number) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=2) | |
bp3 <- data3 %>% group_by(bp_number) %>% summarise(count=n(), min=min(elapsed), max=max(elapsed), avg=mean(elapsed), pct90=quantile(elapsed, .9), cycle=3) | |
# Plot distributed bar for hostnames | |
# h2 <- melt(h1, id.vars = "host") | |
# ggplot(h2, aes(x=host, y=value)) + geom_bar(aes(fill=variable), position="dodge", stat = "identity") | |
``` | |
One of the first steps in the analysis is to verify that executions were not influenced by the test infrastructure. | |
__Are the aggregated response times influenced by the load generators or for individual business process?__ | |
```{r echo=F, message=F} | |
#ggplot(rbind(host1, host2, host3), aes(x=factor(cycle), y=pct90)) + geom_bar(stat="identity",position="dodge", aes(fill=Hostname)) + scale_fill_brewer() | |
#ggplot(rbind(host1, host2, host3), aes(x=factor(cycle), y=pct90, colour=Hostname)) + geom_line(aes(group=Hostname)) + geom_point(size=3) | |
p1 <- ggplot(rbind(host1, host2, host3), aes(x=factor(cycle), y=pct90, colour=Hostname)) + geom_line(aes(group=Hostname)) + geom_point(size=3) | |
p2 <- ggplot(rbind(bp1, bp2, bp3), aes(x=factor(cycle), y=pct90, colour=bp_number)) + geom_line(aes(group=bp_number)) + geom_point(size=3) | |
pushViewport(viewport(layout = grid.layout(1, 2))) | |
print(p1, vp = vplayout(1,1)) | |
print(p2, vp = vplayout(1,2)) | |
``` | |
__Answer:__ No, the responses across all the load generators are consistent. The biggest monitored gap is 2 seconds, observed in load generator HD1403472 with 2 seconds difference in cycle 2. | |
```{r include=F} | |
kable(host1) | |
kable(host2) | |
kable(host3) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment