Last active
May 25, 2023 12:38
-
-
Save ribsy/be7c682a58bd31e37e8f5e4d71067151 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
my_packages <- c("tidyverse", "lubridate", "survival", "survminer", | |
"ProbBayes", "Bolstad", "ggplot2", "devtools", | |
"mc2d", "bayestestR", "scales", "knitr", "ggridges", | |
"simstudy", "betareg", "data.table", "boot", "broom", | |
"stats4","jsonlite","tidybayes","bayesplot", "BayesFactor", | |
"rstanarm") | |
not_installed <- | |
my_packages[!(my_packages %in% installed.packages()[ , "Package"])] | |
if (length(not_installed)){ | |
msgVal = paste("Install packages: ",toString(not_installed)) | |
msgRes <- askYesNo(msgVal) | |
if( msgRes != TRUE){ | |
stopMsg <- paste("Quitting because you don't want to install:", not_installed) | |
stop(stopMsg) | |
} | |
} | |
if(length(not_installed)) install.packages(not_installed) | |
# Deal with ggdistribute - which is in github | |
my_packages <- c("ggdistribute") | |
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])] | |
if (length(not_installed)){ | |
msgVal = paste("Install ggdistribute?") | |
msgRes <- askYesNo(msgVal) | |
if( msgRes != TRUE){ | |
stopMsg <- paste("Quitting because you don't want to install ggdistribute") | |
stop(stopMsg) | |
} | |
} | |
if(length(not_installed)) install_github("iamamutt/ggdistribute") | |
library(devtools) | |
library(tidyverse) | |
library(lubridate) | |
library(survival) | |
library(survminer) | |
library(bayestestR) | |
library(ProbBayes) | |
library(Bolstad) | |
library(ggplot2) | |
library(mc2d) | |
library(scales) | |
library(knitr) | |
library(ggdistribute) | |
library(ggridges) | |
library(simstudy) | |
library(betareg) | |
library(data.table) | |
library(boot) | |
library(broom) | |
library(jsonlite) | |
library(stats4) | |
library(tidybayes) | |
library(bayesplot) | |
library(BayesFactor) | |
library(rstanarm) | |
select <- dplyr::select | |
`%!in%` <- Negate(`%in%`) | |
ScoreBetaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), hits = 1, misses = 5){ | |
probs <- bias | |
beta_par <- rbind(first_belief, second_belief) #make dataframe | |
output <- binomial.beta.mix(bias, beta_par, c(hits, misses)) #prior scoring with data of 12 hits out of 20. | |
posterior_odds <- output$probs[1]/ output$probs[2] #Bayes Factor | |
return(posterior_odds) | |
} | |
ScoreGammaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), data = c(13,52)){ | |
probs=bias | |
beliefs=rbind(first_belief, second_belief) | |
data = list(y=data[1], t = data[2]) | |
mix <- poisson.gamma.mix(probs,beliefs,data) | |
scores <- mix$probs[1]/mix$probs[2] | |
return(scores) | |
} | |
MakeBetaGraph <- function(hit, miss, xlab, ylab, tlab, slab , xadj = .01 ){ | |
beta_avg = (hit/(miss + hit)) | |
posterior <- distribution_beta(1000, hit, miss) | |
ci_hdi <- ci(posterior, method = "HDI") | |
posterior <- posterior %>% | |
#estimate_density(extend=TRUE) | |
estimate_density() | |
ytop <- max(posterior$y) * .8 | |
posterior %>% | |
ggplot(aes(x=x, y=y)) + | |
geom_area(fill="darkslateblue", alpha = 0.6) + | |
theme_classic() + | |
geom_vline(xintercept=beta_avg, color="black", size=1) + | |
geom_vline(xintercept=ci_hdi$CI_low, color="black", size=1, linetype="dotted") + | |
geom_vline(xintercept=ci_hdi$CI_high, color="black", size=1, linetype="dotted") + | |
annotate(geom = "text", x = ci_hdi$CI_low + xadj , y = ytop, | |
label = round(ci_hdi$CI_low,3), fontface = 2, hjust = 0, vjust = -5) + | |
annotate(geom = "text", x = ci_hdi$CI_high + xadj , y = ytop, | |
label = round(ci_hdi$CI_high,3), fontface = 2, hjust = 0, vjust = -5) + | |
annotate(geom = "text", x = beta_avg + xadj , y = ytop, | |
label = round(beta_avg,3), fontface = 2, hjust = 0, vjust = -5) + | |
labs(x = xlab, y = ylab, title = tlab, subtitle = slab) | |
} | |
MakeRidgeChart2 <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){ | |
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob, | |
scale = 1.2, color = legend, fill = factor(legend,levels=rev(unique(legend))))) + | |
#scale_fill_manual(values = cbp1) + | |
geom_density_ridges(stat="identity", color = "white", alpha = 0.8, | |
panel_scaling = TRUE, size = 1) + | |
guides(fill=guide_legend(title= legend_val)) + | |
labs(title = title_val, | |
subtitle = sub_val, | |
x = xtitle, | |
y = ytitle) | |
} | |
MakeRidgeChart <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){ | |
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob, | |
scale = 1.2, color = legend, fill = factor(rev(legend),levels=rev(unique(legend))))) + | |
#scale_fill_manual(values = cbp1) + | |
geom_density_ridges(stat="identity", color = "white", alpha = 0.8, | |
panel_scaling = TRUE, size = 1) + | |
guides(fill=guide_legend(title= legend_val)) + | |
labs(title = title_val, | |
subtitle = sub_val, | |
x = xtitle, | |
y = ytitle) | |
} | |
GetRope <- function( eng_risk, month_one, month_two){ | |
mone <- eng_risk %>% filter(yaxis == month_one) %>% select(closed, misses) %>% distinct() | |
mone_dist <- rbeta(1000, mone$closed, mone$misses) | |
mone_hdi <- ci(mone_dist, method = "HDI") | |
mtwo <- eng_risk %>% filter(yaxis == month_two) %>% select(closed, misses) %>% distinct() | |
mtwo_dist <- rbeta(1000, mtwo$closed, mtwo$misses) | |
mtwo_hdi <- ci(mtwo_dist, method = "HDI") | |
diff_dist <- mtwo_dist - mone_dist | |
rope(diff_dist) | |
} | |
MakeVulnData <- function(vulnData, prior){ | |
hasGroup <- length(vulnData) | |
for( i in 1:nrow(vulnData)){ | |
if( i == 1){ | |
hits <- vulnData$hits[i] + prior[1] | |
misses <- vulnData$misses[i] + prior[2] | |
yaxis <- vulnData$yaxis[i] | |
if(hasGroup == 4){ | |
legend <- vulnData$legend[i] | |
}else{ | |
legend <- yaxis | |
} | |
res <- MakeBurnData(hits, misses, yaxis, legend) | |
eng_risk <- res | |
}else{ | |
hits <- vulnData$hits[i] + hits | |
misses <- vulnData$misses[i] + misses | |
yaxis <- vulnData$yaxis[i] | |
if(hasGroup == 4){ | |
legend <- vulnData$legend[i] | |
}else{ | |
legend <- yaxis | |
} | |
res <- MakeBurnData(hits, misses, yaxis, legend) | |
eng_risk <- bind_rows(res, eng_risk) | |
} | |
} | |
return(eng_risk) | |
} | |
MakeBurnData <- function(closed, misses, yaxis, legend){ | |
x_vals = seq(from=0,to=1,length = 1000) | |
dens = dbeta(x_vals,closed, misses) | |
prob = dens / max(dens) | |
res <- tibble(x_vals, yaxis , prob, legend, closed, misses, dens) | |
return(res[res$dens != "Inf",]) | |
} | |
GetBeliefsEvents <- function(belief, max_belief ){ | |
alpha_ratio <- as.numeric(belief) | |
beta_ratio <- as.numeric(max_belief) | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
GetBeliefsHits <- function(hits, max_hits, events ){ | |
alpha_ratio <- hits / events | |
beta_ratio <- max_hits / events | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
GetBetResults <- function(guess, tries, w.ball){ | |
guess_cost <- tries * 5 | |
diff_cost <- abs(w.ball - guess) * 5 | |
total_cost <- guess_cost + diff_cost | |
profit <- 100 - total_cost | |
print(paste0("Your Winnings: ", profit)) | |
} | |
GetRolls <- function(tries){ | |
w.ball <- sample(44,1) | |
val <- "" | |
for(x in 1:tries){ | |
val <- paste0(rollball(w.ball)," ",val) | |
} | |
print(val) | |
return(w.ball) | |
} | |
rollball <- function(w.ball){ | |
if( w.ball == 1){ | |
r.ball <- sample(2:44,1) | |
}else if( w.ball == 44){ | |
r.ball <- sample(1:43,1) | |
}else{ | |
r.ball <- sample(c(1:(w.ball - 1), (w.ball + 1):44),1) | |
} | |
if(r.ball < w.ball){ | |
return("Left") | |
}else{ | |
return("Right") | |
} | |
} | |
ErrorCheck <- function(base_configs, event_counts){ | |
a_total <- as.numeric(base_configs["a_n_events"]) | |
a_errors <- as.numeric(event_counts["product_a_errors"]) | |
b_total <- as.numeric(base_configs["b_n_events"]) | |
b_errors <- as.numeric(event_counts["product_b_errors"]) | |
#Remove errors to get on true events | |
a_true <- a_total - a_errors | |
b_true <- b_total - b_errors | |
cat(paste("A Total:",a_total,"A Errors:", a_errors,"A True:", a_true, "\n")) | |
cat(paste("B Total:",b_total,"A Errors:", b_errors,"B True:", b_true, "\n")) | |
# Obvious misconfiguration | |
if( a_total < a_errors){ | |
stop(paste("Exiting: Your errors for product A of: ", a_errors," exceed your total events for product A of: ", a_total)) | |
} | |
if( b_total < b_errors){ | |
stop(paste("Exiting: Your errors for product B of: ", b_errors," exceed your total events for product B of: ", b_total)) | |
} | |
# If A's True Vulns (i.e 30) is great than B's Total (i.e 25) - | |
if( b_total < a_true ){ | |
diff <- a_true - b_total | |
cat(paste("Product B is missing", diff, "false positive error(s). Add ", diff ," each to B's EVENTS and ERRORS\n")) | |
stop("Exiting") | |
} | |
if( a_total < b_true ){ | |
diff <- b_true - a_total | |
cat(paste("Product A is missing", diff, "false positive error(s). Add ", diff ," each to A's EVENTS and ERRORS\n")) | |
stop("Exiting") | |
} | |
#Is Prod A/B Missing errors - false negatives specifically? | |
if( a_true < b_true ){ | |
diff <- b_true - a_true | |
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product A for a new total of: ",diff + a_errors,"\n")) | |
return(c("product_a_errors", diff)) | |
} | |
if( b_true < a_true ){ | |
diff <- a_true - b_true | |
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product B for a new total of: ",diff + b_errors,"\n")) | |
return(c("product_b_errors", diff)) | |
} | |
return(c("No Change", 0)) | |
} | |
CatTotalCost <- function(total_costs, product_costs){ | |
#Get Cost Ratios | |
prod_a_total_costs <- total_costs[1] | |
prod_b_total_costs <- total_costs[2] | |
prod_ratio <- round( prod_a_total_costs / prod_b_total_costs, 3) * 100 | |
prod_ratio <- paste(prod_ratio, "%", sep = "") | |
#Format as dollars | |
prod_a_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_a_cost"])) | |
prod_a_total_final <- dollar_format()(round(as.numeric(prod_a_total_costs),2)) | |
prod_b_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_b_cost"])) | |
prod_b_total_final <- dollar_format()(round(as.numeric(prod_b_total_costs),2)) | |
#Print Message to stdout | |
cat("--------Which Product Cost Most To Operate Given Forecasted Errors-------\n") | |
cat(paste(" Product A is", prod_ratio , "the total cost of product B\n")) | |
cat(paste(" Product A (with base price of ", prod_a_orginal_cost ,") is expected to cost to operate with errors: ", prod_a_total_final,"\n")) | |
cat(paste(" Product B (with base price of ", prod_b_orginal_cost,") is expected to cost to operate with errors: ", prod_b_total_final,"\n")) | |
cat("\n Still uncertain? Get more data by running more tests!") | |
return(c(prod_ratio, prod_a_orginal_cost, prod_a_total_final, prod_b_orginal_cost, prod_b_total_final)) | |
#print(knitr::kable(out)) | |
} | |
GraphErrorImpact <- function(posterior, impact){ | |
newdf_a_impact <- data.frame(posterior = posterior$prod_a_impact, product = "Product A") | |
newdf_b_impact <- data.frame(posterior = posterior$prod_b_impact, product = "Product B") | |
newdf_ab_impact <- rbind(newdf_a_impact, newdf_b_impact) | |
impact_text <- paste(" Product A is", impact[1] , "the total cost of product B\n Product A (with base price of ", impact[2] ,") is expected to cost to operate with errors: ", impact[3],"\n Product B (with base price of ", impact[4],") is expected to cost to operate with errors: ", impact[5],"\n") | |
print(ggplot(newdf_ab_impact, aes(x = posterior, y = ..density.., fill = product)) + | |
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") + | |
scale_fill_manual(values=c("#333399", "#CCCCFF")) + | |
labs(title = "FORECASTING APPSEC ERROR IMPACT\nUsing Bayesian A/B Testing\n", x="Using Small Samples: Predicted Difference In Product Error Impacts", | |
subtitle = impact_text, | |
y = "Product Error Comparisons")) | |
} | |
CatErrorDiffProp <- function(posterior){ | |
error_rate_diff <- sum(posterior$prop_diff_events > 0 )/length(posterior$prop_diff_events) | |
cat(paste("--------Error Rate Difference For This Simmulation--------\n Probability that product A makes more errors that Product B is ",round(error_rate_diff * 100) ,"%\n\n", sep = "")) | |
} | |
GraphErrorDiff <- function(posterior){ | |
xval <- posterior$prop_diff_events | |
breaks <- pretty(range(xval), n = nclass.FD(xval), min.n = 1) | |
bwidth <- breaks[2]-breaks[1] | |
print(ggplot(posterior, aes(x=prop_diff_events)) + | |
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth = bwidth)+ | |
geom_density(alpha=.2, fill="#333399") + labs(title="Event Amount Difference: Product A - Product B",x="Event Rate", y = "Amount")+ | |
geom_vline(xintercept=0, color="red", size=1, linetype = "dotted") | |
) | |
} | |
GraphErrorRates <- function(posterior){ | |
newdf_a <- data.frame(posterior = posterior$prod_a, product = "Product A") | |
newdf_b <- data.frame(posterior = posterior$prod_b, product = "Product B") | |
newdf_ab <- rbind(newdf_a, newdf_b) | |
print(ggplot(newdf_ab, aes(x = posterior, y = ..density.., fill = product)) + | |
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") + | |
scale_fill_manual(values=c("#333399", "#CCCCFF")) + | |
labs(title = "Product A/B Testing", x="Using Small Samples: Predicted Difference In Product Error Rates", | |
y = "Product Comparisons")) | |
} | |
CatUpdatedBeliefRates <- function(posterior, event_counts, base_configs){ | |
out <- "--------Updated Forecasts Given Small Data, Beliefs & Lots of Simulations--------------\n" | |
# 89% Highest Density Intervals | |
prod_a_points <- point_estimate(posterior$prod_a) | |
ci_hdi_prod_a <- ci(posterior$prod_a, method = "HDI") | |
out <- paste(out, " The previous product A forecast combined with ", base_configs["a_n_events"] ," events and ", event_counts["product_a_errors"] ," errors is between:", | |
round(ci_hdi_prod_a$CI_low,2) * 100, "% and ", | |
round(ci_hdi_prod_a$CI_high,2) * 100, "% \n And centered near:", | |
round(prod_a_points[[1]],2) * 100, "%\n\n", sep="") | |
# 89% Highest Density Interval | |
prod_b_points <- point_estimate(posterior$prod_b) | |
ci_hdi_prod_b <- ci(posterior$prod_b, method = "HDI") | |
out <- paste(out, " The previous product B foreacst combined with ", base_configs["b_n_events"] ," events and ", event_counts["product_b_errors"]," errors is between:", | |
round(ci_hdi_prod_b$CI_low,2) * 100, "% and ", | |
round(ci_hdi_prod_b$CI_high,2) * 100, "% \n And centered near:", | |
round(prod_b_points[[1]],2) * 100, "%\n\n", sep="") | |
out <- paste(out, "See the markdown report or plot tab graphs in rstudio to see forecasts combined with data.\n") | |
cat(out) | |
} | |
CatPriorBeliefRates <- function(event_priors){ | |
#Pring Beta Curve | |
#print(MakeCurves(event_priors[["median_errors"]], event_priors[["edge_errors"]],color="Purple", CI=TRUE, CIT=TRUE)) | |
xval = "Event Probability" | |
yval = "Event Strength" | |
tval = "Credible Beliefs About Events" | |
sval = "With 95% Credible Belief Interval" | |
beta_vals <- GetBeliefsEvents(event_priors[["median_errors"]], event_priors[["edge_errors"]] ) | |
print(ggplotBeta(beta_vals[1], beta_vals[2], xlab = xval, ylab = yval, tlab = tval, sval = sval)) | |
#print(MakeBetaGraph(beta_vals[1], beta_vals[2], xlab = xval, ylab = yval, tlab = tval,slab = sval)) | |
#Get Beta Prio CIs | |
priorCI <- GetBetaCI(event_priors["median_errors"], event_priors["edge_errors"]) | |
cat("------Prior Beliefs About Error Rates--------\n") | |
cat(paste(" You forecasted an ERROR RATE equally ABOVE/BELOW ", round(event_priors[["median_errors"]]*100), "%.\n", sep="")) | |
cat(paste(" You're also 90% sure the error rate is LESS THAN ", round(event_priors[["edge_errors"]]*100), "%.\n", sep="")) | |
cat(paste(" Given that forecast and the model we are using you you believe the true rate likely sits between ", priorCI[1],"% and ", priorCI[3], "%\n\n", sep="")) | |
} | |
GetTotalCosts <- function(impacts, costs, time_multiple, event_counts, prior_events, base_configs){ | |
product_a_counts <- event_counts[[1]] | |
product_b_counts <- event_counts[[2]] | |
# Get Gamma Prior for product a which will get fed into poisson dist in GetEventFrequency() | |
a_number_events <- as.numeric(base_configs$a_n_events) | |
a_gamma_median_prior <- prior_events[[1]] * a_number_events | |
a_gamma_sd <- prior_events[[2]] * a_number_events | |
# Get Gamma Prior for product b which will get fed into poisson dist in GetEventFrequency() | |
b_number_events <- as.numeric(base_configs$b_n_events) | |
b_gamma_median_prior <- prior_events[[1]] * b_number_events | |
b_gamma_sd <- prior_events[[2]] * b_number_events | |
prod_a_event_results <- GetEventFrequency(product_a_counts, a_gamma_median_prior, a_gamma_sd) | |
prod_a_mean_events <- prod_a_event_results$mean | |
prod_a_event_totals <- prod_a_mean_events * as.numeric(time_multiple) | |
prod_b_event_results <- GetEventFrequency(product_b_counts, b_gamma_median_prior, b_gamma_sd) | |
prod_b_mean_events <- prod_b_event_results$mean | |
prod_b_event_totals <- prod_b_mean_events * as.numeric(time_multiple) | |
prod_a_total_cost <- prod_a_event_totals * impacts$prod_a_impact + costs$prod_a_cost + costs$prod_a_run | |
prod_b_total_cost <- prod_b_event_totals * impacts$prod_b_impact + costs$prod_b_cost + costs$prod_b_run | |
return(c(prod_a_total_cost, prod_b_total_cost) ) | |
} | |
GetEventFrequency <- function(event_counts, avg_event_count, event_variation, graph = FALSE){ | |
#rate vals can be a vector of results, perhaps events per day, week , month etc | |
# mode and sd will be converted into rate and scale. | |
event_count_priors <- GetRatePriors(avg_event_count, event_variation, graph) | |
result <- poisgamp(event_counts, event_count_priors[1], event_count_priors[2], plot = FALSE, suppressOutput = TRUE) | |
return(result) | |
} | |
GetRatePriors <- function(mode, sd, graph = FALSE){ | |
# This function allows you to think in terms of mode and SD. | |
# Note that everything is positive..so while the mode might be a small number that is close to 0 | |
# The SD can be large skewing far to the right, without going past zero on the left. | |
# Here are the corresponding rate and shape parameter values: | |
ra = ( mode + sqrt( mode^2 + 4*sd^2 ) ) / ( 2 * sd^2 ) | |
sh = 1 + mode * ra | |
if(graph){ | |
x = seq(0,mode+5*sd,len=1001) | |
plot( x , dgamma( x , shape=sh , rate=ra ) , type="l" , | |
main=paste("dgamma, mode=",mode,", sd=",sd,sep="") , | |
ylab=paste("dgamma( shape=",signif(sh,3)," , rate=",signif(ra,3)," )", | |
sep="") ) | |
abline( v=mode , lty="dotted" ) | |
} | |
return(c(sh, ra)) | |
} | |
GetEngCost <- function(size, eng_cost_pert, eng_hour_priors){ | |
mode <- as.numeric(eng_cost_pert[["mode"]]) | |
low <- as.numeric(eng_cost_pert[["low"]]) | |
high <- as.numeric(eng_cost_pert[["high"]]) | |
mode_hrs <- as.numeric(eng_hour_priors[["mode"]]) | |
low_hrs <- as.numeric(eng_hour_priors[["low"]]) | |
high_hrs <- as.numeric(eng_hour_priors[["high"]]) | |
eng_rate <- rpert(size, min = low, mode = mode, max = high, shape = 4) | |
eng_hours <- rpert(size, min = low_hrs, mode = mode_hrs, max = high_hrs, shape = 4) | |
eng_cost <- eng_hours * eng_rate | |
return(eng_cost) | |
} | |
GetErrors <- function(n_draws, n_events, proportion_events){ | |
n_errors <- rbinom(n = n_draws, size = n_events, prob = proportion_events) | |
return(n_errors) | |
} | |
GetPropotionOfEvents <- function(n_draws,prior_events){ | |
error_beliefs <- GetBeliefsEvents(prior_events[["median_errors"]], prior_events[["edge_errors"]]) | |
#Make an n_draws length list of error beliefs | |
proportion_events <- rbeta(as.numeric(n_draws), as.numeric(error_beliefs[[1]]) , as.numeric(error_beliefs[[2]]) ) | |
return(proportion_events) | |
} | |
SimulateHits <- function(freq_hits, num_events){ | |
freq_miss <- 1 - freq_hits | |
res <- sample(c("hit", "miss"), size=num_events, replace=TRUE, | |
prob=c(freq_hits, freq_miss)) | |
hit <- sum(res == "hit") | |
miss <- sum(res == "miss") | |
return( c(hit,miss)) | |
} | |
GetBetaCI <- function(hit,edge){ | |
betas <- GetBeliefsEvents(hit, edge) | |
intervals <- qbeta(c(.025, .5, .975), betas[1], betas[2]) | |
intervals <- round(intervals * 100) | |
return(intervals) | |
} | |
MakeCurves <- function(alpha = 0, beta = 0 , color = "black", ytop = 20, CI = FALSE, CIT = FALSE, add_val = FALSE){ | |
alpha <- alpha + 1 | |
beta <- beta + 1 | |
curve(dbeta(x, alpha , beta ), from=0, to=1, | |
xlab="SME Forecast For Proportion of Events", ylab="Density", | |
col= color, lwd=4, las=1, add = add_val , ylim = c(0,ytop)) | |
if( CI ){ | |
intervals <- qbeta(c(.025, .975), alpha, beta) | |
abline(v = intervals[1], lty=2) | |
abline(v = intervals[2], lty=2) | |
if( CIT ){ | |
adj <- 0 | |
if( intervals[2] - intervals[1] < 0.11 ){ | |
adj <- 1 | |
} | |
text(intervals[1] + 0.03,ytop, round(intervals[1], 2)) | |
text(intervals[2] + 0.03 ,ytop - adj, round(intervals[2], 2)) | |
} | |
} | |
} | |
ggplotBeta <- function(alpha, beta, xlab = xval, ylab = yval, tlab = "Beliefs", sval = "Test"){ | |
x = seq(0, 1, 0.01) | |
y = dbeta(x, shape1=alpha, shape2=beta) | |
density = data.frame(x=x, y=y) | |
mean=alpha/(alpha+beta) | |
mode=(alpha-1)/(alpha+beta-2) | |
intervals <- qbeta(c(.025, .975), alpha, beta) | |
#print(paste0("sval: ", sval, " intervals: ", round(intervals[1],2), " to ",round(intervals[2],2) )) | |
sval <- paste0(sval, " of ", round(intervals[1],2), " to ", round(intervals[2],2), " and mean: ", round(mean,2)) | |
res <- ggplot(density, aes(x, y)) + | |
geom_area(fill= "darkslateblue", alpha = 0.6) + | |
theme_minimal()+ | |
theme(axis.text.x = element_text(angle = 90, hjust = 1), | |
axis.text.y = element_blank()) + | |
geom_vline(xintercept = intervals[1]) + | |
geom_vline(xintercept = intervals[2])+ | |
geom_vline(xintercept = mean)+ | |
labs(x = xlab, y = ylab, title = tlab, subtitle = sval) | |
return(res) | |
} | |
GetSize <- function(size, first, second){ | |
res <- ifelse(length(first) >= length(second), length(second), length(first) ) | |
if( size > res){ | |
return(res) | |
} | |
return(size) | |
} | |
GetEngHours <- function(size, med = 10, ninety = 60, total_hours = 100){ | |
beta_vals <- GetBeliefsHits(med, ninety, total_hours) | |
prop_hours <- rbeta(size, beta_vals[1], beta_vals[2]) | |
hours <- rbinom(size, total_hours, prop_hours) | |
return(hours) | |
} | |
#' ##Wrapper Function | |
#' This just makes it easy for users to call one function without much fuss. | |
runProductCostComparison <- function(base_configs, event_priors, event_counts, eng_hour_range, eng_cost_range, product_costs){ | |
# A Quick Check for missed false negatives between products and other sundry. | |
update_count <- ErrorCheck(base_configs, event_counts) | |
if( update_count[2] > 0){ | |
event_counts[update_count[[1]]] <- as.numeric(update_count[[2]]) + event_counts[[update_count[1]]] | |
if( update_count[1] == "product_a_errors"){ | |
base_configs$a_n_events <- base_configs$a_n_events + as.numeric(update_count[[2]]) | |
print(paste("Updating Product A Events: ", base_configs$a_n_events)) | |
} | |
if( update_count[1] == "product_b_errors"){ | |
base_configs$b_n_events <- base_configs$b_n_events + as.numeric(update_count[[2]]) | |
print(paste("Updating Product B Events: ", base_configs$a_n_events)) | |
} | |
} | |
#' ##Prior Beliefs About Error Rates | |
#' In the *event_priors* config element above you specified what you think is the likely error rate given N events. This graph shows you" | |
#' the spread of possible rates given our beliefs. It also gives some indication of where the most likely rates may cluster." | |
CatPriorBeliefRates(event_priors) | |
#Number of simulations (draws), tests (events), and default array index sizes | |
n_draws <- as.numeric(base_configs[["n_draws"]]) | |
a_n_events <- as.numeric(base_configs[["a_n_events"]]) | |
b_n_events <- as.numeric(base_configs[["b_n_events"]]) | |
size <- 10000 | |
#Create a list of simmulated events and error RATES based on priors | |
proportion_events <- GetPropotionOfEvents(n_draws, event_priors) | |
a_n_errors <- GetErrors(n_draws, a_n_events, proportion_events) | |
b_n_errors <- GetErrors(n_draws, b_n_events, proportion_events) | |
#Put simulations into dataframe | |
a_prior <- data.frame(proportion_events, a_n_errors) | |
b_prior <- data.frame(proportion_events, b_n_errors) | |
#print(head(b_prior,10)) | |
# Select the simulationed prior data that correlates to the real product errors | |
product_a <- a_prior[a_prior$a_n_errors == event_counts[["product_a_errors"]], ] | |
product_b <- b_prior[b_prior$b_n_errors == event_counts[["product_b_errors"]], ] | |
#print(head(product_a)) | |
#print(head(product_b)) | |
#Sample to create indices for posterior and load into data frame | |
sample_a_indices <- sample( nrow(product_a), size = size, replace = TRUE, prob = product_a$proportion_events) | |
sample_b_indices <- sample( nrow(product_b), size = size, replace = TRUE, prob = product_b$proportion_events) | |
posterior <- data.frame( | |
prod_a = product_a$proportion_events[sample_a_indices], | |
prod_b = product_b$proportion_events[sample_b_indices]) | |
#print(head(posterior)) | |
#' Updated beliefs given real error rates | |
CatUpdatedBeliefRates(posterior, event_counts, base_configs) | |
#' Create and Print out Error Rate Graphs | |
GraphErrorRates(posterior) | |
#' Calculate the rate differences, graph and print out | |
posterior$prop_diff_events <- posterior$prod_a - posterior$prod_b | |
print(head(posterior,10)) | |
GraphErrorDiff(posterior) | |
CatErrorDiffProp(posterior) | |
#This gets our impact per event with lots of uncertainty packed in | |
eng_cost <- GetEngCost(size, eng_cost_range, eng_hour_range) | |
# Add forecasted expected cost column to posterior for product a and b and store in list | |
posterior$prod_a_impact <- posterior$prod_a * eng_cost | |
posterior$prod_b_impact <- posterior$prod_b * eng_cost | |
#Load Median values into list of use later | |
impacts <- list() | |
impacts["prod_a_impact"] <- median(posterior$prod_a_impact) | |
impacts["prod_b_impact"] <- median(posterior$prod_b_impact) | |
# Add the column that holds the difference between the two costs | |
posterior$prod_diff_impact <- posterior$prod_a_impact - posterior$prod_b_impact | |
print("Here is a glimpse into some of the data.") | |
#print(head(posterior,10)) | |
total_costs <- GetTotalCosts(impacts, product_costs, base_configs["time_multiple"], event_counts, event_priors, base_configs) | |
#' #Total Costs Differences | |
impact_res <- CatTotalCost(total_costs, product_costs) | |
#' Graph Impact Difference | |
GraphErrorImpact(posterior, impact_res) | |
} | |
GetVulnPredictions <- function(beta_closed, beta_opened, future_counts){ | |
ab = c(beta_closed, beta_opened) | |
future_count = future_counts; | |
future_list = 0:future_count | |
plf = pbetap(ab, future_count, future_list) | |
return(round(cbind(future_list, plf), digits=3)) | |
} | |
GetVulnsOpenedByWks <- function(data_table = dt, week_list = 1:52, group_list = 1:10, audit_list = 0:1, exposure_list = 1:3){ | |
vulns <- data_table %>% | |
filter(week.fseen %in% week_list & group %in% group_list & audit %in% audit_list & exposure_id %in% exposure_list) %>% | |
select(week.fseen, severity, group, hits) %>% | |
group_by(group,severity, week.fseen) %>% | |
summarise(ext_count = sum(hits),.groups = 'drop') | |
#52 weeks of zeros 3X | |
empty_week <- tibble(week.fseen = rep(1:52,length(group_list)),ext_count = 0) | |
# Include Weeks With Zero Counts | |
vuln_wks <- right_join(vulns,empty_week, by = "week.fseen") %>% | |
mutate( new_count = if_else(is.na(ext_count.x),0, ext_count.x)) %>% | |
select(week.fseen, severity,group, new_count) %>% distinct() %>% | |
group_by(week.fseen, severity) %>% summarize(vuln_count = sum(new_count),.groups = 'drop') %>% | |
arrange(week.fseen) %>% distinct() | |
vuln_wks <- vuln_wks %>% pivot_wider(id_cols = c(week.fseen), | |
names_from = severity, values_from = c(vuln_count),values_fill=0) | |
vuln_wks <- vuln_wks %>% select(week.fseen, extreme, critical, high) | |
col_len <- length(vuln_wks) | |
col_start <- col_len - 2 | |
vuln_wks <- vuln_wks %>% replace(is.na(.), 0) %>% mutate(all = rowSums(across(col_start:col_len))) | |
vuln_wks <- vuln_wks %>% select(week.fseen, extreme, critical, high, all) | |
return(vuln_wks) | |
} | |
GetVulnGroups <- function(group_count = 10, ext = c(.01,.03), sev = c(.031, .07), hgh = c(.071, .15)){ | |
dt <- tibble() | |
for(x in 1:group_count){ | |
extreme <- sample(seq(from = ext[1], to = ext[2], by= .001), size = 1) | |
severe <- sample(seq(from = sev[1], to = sev[2], by= .001), size = 1) | |
high <- sample(seq(from = hgh[1], to = hgh[2], by= .001), size = 1) | |
patch_fail <- sample(1:4,size=1) | |
dev_maturity <- sample(1:4,size=1) | |
#print(paste0("Ext: ",extreme," Sev: ", severe, " High: ", high, " Patch Fail: ", patch_fail, " Dev Mat: ", dev_maturity)) | |
res <- GetAdvSecResults(c(extreme, severe, high), patch_fail, dev_maturity) | |
res$group <- x | |
dt <- bind_rows(dt,res) | |
} | |
return(dt) | |
} | |
betaFormula <- function(audit, severity_id, exposure_id, time_adj, nDays, dev_maturity){ | |
adj_val <- ((nDays*2)/365) * (dev_maturity * 3) | |
#print(adj_val) | |
alpha <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj)) | |
#print(alpha) | |
mult <- if_else(alpha < 10, 2.5, if_else(alpha < 30,2,1.5)) | |
beta <- alpha * mult | |
avg <- (alpha - adj_val)/(alpha + beta) | |
avg[avg <= 0] <- .005 | |
#print(avg) | |
return(avg) | |
} | |
betaVariance <- function(audit, severity_id, exposure_id, time_adj){ | |
alpha <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj)) | |
mult <- if_else(alpha < 10, 2, if_else(alpha < 30,2,1.5)) | |
beta <- alpha * mult | |
ret <- ((alpha * beta)/(alpha + beta)^2 * (alpha + beta + 1)) | |
return(round(ret,4)) | |
} | |
binomVariance <- function(audit, severity_id, exposure_id, time_adj){ | |
avg <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj)) | |
mult <- if_else(avg < 10, 4, if_else(avg < 30,3,2)) | |
var <- round(avg * mult) | |
#print(paste0("Binom Var: ", var)) | |
return(var) | |
} | |
GetSecResults <- function(events = c(.05,.10,.2), event_corr = 0.3, patch_fail = 1){ | |
#Simplify, 6 team combinations, so groups of 6 | |
team_count <- 6 | |
days <- 365 | |
total <- days * team_count | |
eventSum <- sum(events) | |
# Create 6 Groups of 365 Each. | |
team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam") | |
team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days) | |
team <- genData(team_count, team) | |
team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id") | |
#Put days into the nDays colums | |
team$nDays <- (team$id - (365*(team$idTeam - 1))) | |
# Probablity each day will have one or more extreme, critical or high risk events. | |
# Mild correlation between the variables | |
dt <- genCorGen(n = total, nvars = 3, params1 = c(events[1], events[2], events[3]), | |
dist = "binary", rho = event_corr, corstr = "cs", wide = TRUE, | |
method = "ep",cnames = "extreme_event,critical_event,high_event", id = "id") | |
# For each day with an Ext,Crit,High events randomly draw a small number of hits for each | |
# These are slightly more collelated. | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "extreme_event", cnames = "extreme") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "critical_event", cnames = "critical") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "high_event", cnames = "high") | |
#Event showed 1 but poisson returned 0 - Make it at least 1 | |
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1 | |
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1 | |
dt$high[dt$high_event == 1 & dt$high == 0] <- 1 | |
#Merge the Team/Days data with the hits/events data | |
dt <- mergeData(team, dt, "id") | |
#Add Exposure Categories for Audit and Exposure Type | |
dt$audit <- if_else(dt$id > total/2,1,0) | |
#Set Exposure Type | |
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external" | |
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner" | |
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal" | |
#Add Exposure ID Field | |
dt$exposure_id[dt$exposure == "external"] <- 1 | |
dt$exposure_id[dt$exposure == "partner"] <- 2 | |
dt$exposure_id[dt$exposure == "internal"] <- 3 | |
#Add Names based on audit and exposure type - note that this is just text, idTeam is most important. | |
dt <- genFactor(dt, "idTeam", | |
labels = c("external audit", "partner audit", "internal audit", | |
"external no audit", "partner no audit", "internal no audit")) | |
#Copy team name over and get a clean dataframe. | |
dt$team <- dt$fidTeam | |
## I think this may be the error | |
dt <- dt %>% select(idTeam, team, nDays, id, extreme, critical, high, audit, exposure, exposure_id) | |
#Need To move melt to later as it messes with defCondition | |
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","audit","exposure","exposure_id" ), | |
measure.vars = c("extreme", "critical", "high"), | |
variable.name = "severity", value.name = "hits") | |
#Cache records with Zeros for Poisson | |
dtZeros <- dt %>% filter(hits == 0) | |
#Get One Vuln Per Row i.e. it drops zeros... | |
dt <- dt[rep(1:.N,hits)] | |
dt$hits <- 1 | |
#Reset ID field | |
dt$id <- 1:length(dt$idTeam) | |
setkey(dt,id) | |
#####Get Remediation Times | |
# Make numeric for formula input | |
dt$severity_id[dt$severity == "extreme"] <- 1 | |
dt$severity_id[dt$severity == "critical"] <- 2 | |
dt$severity_id[dt$severity == "high"] <- 3 | |
# Set Fix Time Based On Vuln, Asset and Audit | |
# Note small adjustment upward based on event rate divided by 2 | |
defFix <- defDataAdd(varname = "fix_time", | |
formula = "((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43))", | |
dist = "noZeroPoisson") | |
dt <- addColumns(defFix, dt) | |
#Fix Date | |
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom") | |
dt <- addColumns(defFixDate, dt) | |
#Chance of Something NOT getting fixed goes up with more time. | |
defStatus <- defDataAdd(varname = "status", formula = "(fix_time/..patch_fail)/365", dist = "binary") | |
dt <- addColumns(defStatus, dt) | |
#Add date stamps for first seen and last seen | |
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"), | |
as.Date(dt$nDays, origin = "2021-01-01")) | |
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"), | |
as.Date(dt$fix_date, origin = "2021-01-01")) | |
#Add week number for simplicity | |
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52))) | |
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52)) | |
return(dt) | |
} | |
# Advance Option | |
GetAdvSecResults <- function(events = c(.02,.10,.2), patch_fail = 1, dev_maturity = 4){ | |
###### Parameters ########## | |
# EVENTS = chance of having an event for extreme, crit, high per day | |
# PATCH_FAIL = higher the number less like to change the status of a vuln to "still open" | |
# DEV_MATURITY = 1-4 maturity level. Higher increases the likelihood of an event NOT happening. | |
# works with where you are in the year in terms of days and other factors to | |
# adjust the likelihood of events. It also has an impact on rate of closure. | |
################### | |
## Define Constants | |
#Simplify, 6 team combinations, so groups of 6 | |
team_count <- 6 | |
days <- 365 | |
total <- days * team_count | |
eventSum <- sum(events) | |
############### | |
## TEAM SECTION | |
# Create 6 Groups of 365 Each. | |
team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam") | |
team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days) | |
team <- defData(team, varname = "dev_maturity", dist = "nonrandom", formula = "..dev_maturity") | |
team <- genData(team_count, team) | |
team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id") | |
#Put days into the nDays colums | |
team$nDays <- (team$id - (365*(team$idTeam - 1))) | |
######################## | |
## Vulnerability Section | |
# Probablity each day will have one or more extreme, critical or high risk events. | |
# Mild correlation between the variables | |
dt <- genData(n=team_count) | |
dt <- addPeriods(dt, days,perName = "nDays") | |
dt$nDays <- dt$nDays + 1 | |
#ifelval <- if_else((events[1] - ((round(dt$nDays/(90 * (4/dev_maturity)),1)/100) * 1/2)) < 0, .005,events[1] - ((round(dt$nDays/(90 * (4/dev_maturity)),1)/100) * 1/2)) | |
#print(paste0("ifelval res: ", ifelval)) | |
defExtEvent <- defDataAdd(varname = "extreme_event", formula = "if_else((..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2)) < 0, .005,..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2))", dist = "binary") | |
dt <- addColumns(defExtEvent, dt) | |
defCritEvent <- defDataAdd(varname = "critical_event", formula = "if_else((..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2)) < 0, .005,..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2))", dist = "binary") | |
dt <- addColumns(defCritEvent, dt) | |
defHighEvent <- defDataAdd(varname = "high_event", formula = "if_else((..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2)) < 0, .005,..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2))", dist = "binary") | |
dt <- addColumns(defHighEvent, dt) | |
#view(dt) | |
#Use dplyr to get fields, and must reset id for data.table | |
#print(dt) | |
dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event, dev_maturity) | |
#dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event) | |
setkey(dt,id) | |
# For each day with an Ext,Crit,High events randomly draw a small number of hits for each | |
# These are slightly more collelated. | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "extreme_event", cnames = "extreme") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "critical_event", cnames = "critical") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "high_event", cnames = "high") | |
#view(dt) | |
#Event showed 1 but poisson returned 0 - Make it at least 1 | |
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1 | |
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1 | |
dt$high[dt$high_event == 1 & dt$high == 0] <- 1 | |
################ | |
## Asset Section | |
#Merge the Team/Days data with the hits/events data | |
dt <- mergeData(team, dt, "id") | |
#Add Exposure Categories for Audit and Exposure Type | |
dt$audit <- if_else(dt$id > total/2,1,0) | |
#Set Exposure Type | |
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external" | |
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner" | |
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal" | |
#Add Exposure ID Field | |
dt$exposure_id[dt$exposure == "external"] <- 1 | |
dt$exposure_id[dt$exposure == "partner"] <- 2 | |
dt$exposure_id[dt$exposure == "internal"] <- 3 | |
#Add Names based on audit and exposure type - note that this is just text, idTeam is most important. | |
dt <- genFactor(dt, "idTeam", | |
labels = c("external audit", "partner audit", "internal audit", | |
"external no audit", "partner no audit", "internal no audit")) | |
#Copy team name over and get a clean dataframe. | |
dt$team <- dt$fidTeam | |
################### | |
## Reformat Section | |
## Select important columns | |
dt <- dt %>% select(idTeam, team, nDays, id, dev_maturity, extreme, critical, high, audit, exposure, exposure_id) | |
#Need To move melt to later as it messes with defCondition | |
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","dev_maturity","audit","exposure","exposure_id" ), | |
measure.vars = c("extreme", "critical", "high"), | |
variable.name = "severity", value.name = "hits") | |
#Cache records with Zeros for Poisson | |
#dtZeros <- dt %>% filter(hits == 0) | |
dt$hits <- if_else(is.na(dt$hits),0,dt$hits) | |
#Get One Vuln Per Row i.e. it drops zeros... | |
dt <- dt[rep(1:.N,hits)] | |
#Reset ID field | |
dt$id <- 1:length(dt$idTeam) | |
setkey(dt,id) | |
###### | |
## Remediation Time Section | |
# Make numeric for formula input | |
dt$severity_id[dt$severity == "extreme"] <- 1 | |
dt$severity_id[dt$severity == "critical"] <- 2 | |
dt$severity_id[dt$severity == "high"] <- 3 | |
#Ranomom Time adjustment roughly based on event volume. Used for Fix Time Generator | |
defTimeAdj <- defDataAdd(varname = "time_adj", formula = eventSum, variance = .1, dist = "normal") | |
dt <- addColumns(defTimeAdj, dt) | |
dt$time_adj <- if_else(dt$time_adj <= 0 , .01, dt$time_adj) | |
#print(dt) | |
#Get Probablity Associated With Time from Beta Dist | |
#print(paste0("BetaFormula: ", betaFormula(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj, dt$nDays, dt$dev_maturity))) | |
#print(paste0("BetaVariance: ", betaVariance(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj))) | |
betaDef <- defDataAdd(varname = "beta_prob", | |
formula = "betaFormula(audit,severity_id, exposure_id,time_adj, nDays, dev_maturity)", | |
variance = "betaVariance(audit,severity_id, exposure_id,time_adj)", | |
dist = "beta") | |
#print(betaDef) | |
#Add column with list of probabilities | |
dt <- addColumns(betaDef, dt) | |
#print(paste0("Beta Prob: ",dt$beta_prob)) | |
#print(paste0("Variance: ", binomVariance(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj))) | |
# Use beta probabilities as input into binaomial for time to fix | |
binDef <- defDataAdd(varname = "fix_time", | |
formula = "beta_prob", | |
variance = "binomVariance(audit,severity_id, exposure_id,time_adj)", | |
dist = "binomial" | |
) | |
#print(binDef) | |
#Create column for time to fix. | |
dt <- addColumns(binDef, dt) | |
#Update any time to fix zero values with a 1 | |
dt$fix_time[dt$fix_time == 0] <- 1 | |
#Fix Date | |
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom") | |
dt <- addColumns(defFixDate, dt) | |
#Chance of Something NOT getting fixed goes up with more time. | |
defStatus <- defDataAdd(varname = "status", formula = "if_else((fix_time/..patch_fail)/365 > 1,1,(fix_time/..patch_fail)/365)", dist = "binary") | |
dt <- addColumns(defStatus, dt) | |
#Add date stamps for first seen and last seen | |
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"), | |
as.Date(dt$nDays, origin = "2021-01-01")) | |
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"), | |
as.Date(dt$fix_date, origin = "2021-01-01")) | |
#Add week number for simplicity | |
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52))) | |
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52)) | |
return(dt) | |
} | |
GetArrivalPrior <- function(v_open){ | |
# Functions used to get optimized gamma values | |
boot_mean = function(original_vector, resample_vector) { | |
mean(original_vector[resample_vector]) | |
} | |
obj = function(x){ | |
sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95))) | |
} | |
mean_results <- boot(v_open, boot_mean, R = 2000) | |
qbs = quantile(mean_results$t, prob=c(.05, .95)) | |
theta = optim(par = c(1, 1), obj)$par | |
return(theta) | |
} | |
plotGammaAdv <- function(theta, prior, ytop = 1,xval = "Rate", yval = "Density", | |
tlab = "Rate Prior", slab = ""){ | |
pdf <- tibble(gamma_val = prior, theta_val = theta) | |
ggplot(pdf,aes(x=theta, y=gamma_val)) + | |
geom_density(aes(x=theta,y=gamma_val),stat="identity",fill = "darkslateblue", | |
alpha = 0.6) + | |
xlab(xval) + | |
ylab(yval) + | |
labs(title = tlab, subtitle = slab, xlab = xval, ylab = yval) | |
} | |
plot_posterior <- function(theta, prior, likelihood, posterior) { | |
ymax = max(c(prior, posterior)) | |
scaled_likelihood = likelihood * ymax / max(likelihood) | |
plot(theta, prior, type='l', col='orange', xlim= range(theta), ylim=c(0, ymax), ylab='',xlab='Rate', yaxt='n') | |
par(new=T) | |
plot(theta, scaled_likelihood, type='l', col='skyblue', xlim=range(theta), ylim=c(0, ymax), ylab='' ,xlab='Rate', yaxt='n') | |
par(new=T) | |
plot(theta, posterior, type='l', col='seagreen', xlim=range(theta), ylim=c(0, ymax), ylab='',xlab='Rate', yaxt='n') | |
legend("topright", c("prior", "likelihood", "posterior"), lty=1, col=c("orange", "skyblue", "seagreen")) | |
} | |
posteriorGraph <- function(samples){ | |
ci_hdi <- ci(samples, method = "HDI", verbose = FALSE) | |
ci_hdi_small <- ci(samples, method = "HDI", ci = .50, verbose = FALSE) | |
pdf <- tibble(samples = samples) | |
ggplot(pdf, aes(x = samples)) + | |
#geom_area(fill="darkslateblue", alpha = 0.6) | |
geom_density(fill = "darkslateblue", alpha = 0.6, adjust = 4.5) + | |
geom_vline(xintercept = ci_hdi$CI_low, color = "black", size = .5) + | |
geom_vline(xintercept = ci_hdi$CI_high, color = "black", size = .5) + | |
geom_vline(xintercept = ci_hdi_small$CI_low, color = "red", size = .5) + | |
geom_vline(xintercept = ci_hdi_small$CI_high, color = "red", size = .5) | |
} | |
GetNVDAnalysis <- function(years, sev_val = 9, prior_mult = 1, end = 365, theta_range = c(.5,2.0), | |
new = FALSE, save = FALSE, fill_curve = "white", match_string = "default", from_file = FALSE){ | |
Nrep = 10000 | |
count = 1 | |
if( match_string == "default"){ | |
match_string= "Windows|Microsoft|Office|Mac|Apple|Browser|cisco|java|Oracle|adobe" | |
} | |
for(year in years){ | |
prev_year <- year - 1 | |
# use cached files | |
if( count > 1){ | |
prior <-GetNVDPriors(prev_year,sev_val, match_string = match_string, from_file = from_file) * prior_mult | |
}else{ | |
# Cache file | |
prior <-GetNVDPriors(prev_year,sev_val, new_file = new, save_file = save, match_string = match_string, from_file = from_file) * prior_mult | |
count = count + 1 | |
} | |
rates <- GetNVDRatesByYear(year, sev_val, day_end = end, get_new = new, cache = save, match_string = match_string, from_file = from_file) | |
scale <- prior$alpha + rates$num_vulns | |
rate <- prior$lambda + rates$num_vulns * rates$avg_days | |
ppd_graph <- GetPostPredDist(Nrep, scale, rate, year, fill_curve) | |
pm_graph <- GetPostPredMeanGraph(Nrep, scale, rate, year, fill_curve) | |
pgg <- GetGammaPoisChart(prior, rates) | |
print(ppd_graph) | |
print(pm_graph) | |
print(pgg) | |
} | |
} | |
GetNVDPriors <- function(year, sev_val = 9, optim_vals = c(1,1), new_file = FALSE, save_file = FALSE, | |
match_string = "default", from_file = FALSE){ | |
nvd_prev <- GetNVDData(year, new = new_file, save = save_file, from_file = from_file) | |
# Get critical findings | |
prior_open <- GetCritDF(nvd_prev,sev_val, match_string) | |
# Sort findings by day of year | |
prior_open <- sort(prior_open$pub_day) | |
# Add zeros in the list of year days where there were no findings | |
prior_open <- GetFirstSeenHits(prior_open, min = 1, max = 365) | |
g_priors <- GetGammaPriors(prior_open) | |
# Create readable variable names for prior | |
ptibble <- tibble(alpha = g_priors[1], lambda = g_priors[2]) | |
return(ptibble) | |
} | |
GetNVDRatesByYear <- function(year, sev_val = 9, day_end = 365, get_new = FALSE, cache = FALSE, match_string = "default", from_file = FALSE){ | |
nvd_df <- GetNVDData(year, new = get_new, save = cache, from_file = from_file) | |
nvd_df <- nvd_df[yday(nvd_df$pub_date) <= day_end,] | |
#print(nvd_df) | |
crit_df <- GetCritDF(nvd_df,sev_val, match_string) | |
wait_time <- sum(diff(sort(crit_df$pub_day))) | |
num_vulns = nrow(crit_df) | |
vulns_per_wait_time <- num_vulns/wait_time | |
avg_days = wait_time/num_vulns | |
retTib <- tibble(wait_time, num_vulns, vulns_per_wait_time, avg_days) | |
return(retTib) | |
} | |
GetPostPredDist <- function(Nrep, prior, rates, year = "", fill_curve = "white"){ | |
#print(rates) | |
#shape_val <- prior$alpha + rates$num_vulns | |
shape_val <- prior | |
#rate_val <- prior$lambda + rates$num_vulns * rates$avg_days | |
rate_val <- rates | |
if( year != ""){ | |
year = paste0(year, " ") | |
} | |
theta_sim = rgamma(Nrep, shape_val, rate_val) | |
y_sim = rexp(Nrep, rate = theta_sim) | |
val95 <- round(quantile(y_sim, 0.95),2) | |
val50 <- round(quantile(y_sim, 0.50),2) | |
df <- tibble(sim=y_sim) | |
res <- ggplot(df, aes(x=sim)) + | |
geom_histogram(aes(y=..density..), colour="black", fill="white",bins=100)+ | |
geom_density(alpha=.2, fill=fill_curve)+ | |
geom_vline(aes(xintercept=mean(val95)), | |
color="black", linetype="dashed", size=.5)+ | |
geom_vline(aes(xintercept=mean(val50)), | |
color="black", linetype="dashed", size=.5)+ | |
labs(title = paste0(year, "Posterior Predictive Distribution"), | |
subtitle = paste0("95% Chance Vuln In: ", val95," Days Or Less | 50% In: ", | |
val50, " Or Less")) + | |
xlab("Days") + ylab("Strength") + xlim(c(0, max(df$sim))) | |
return(res) | |
} | |
GetPostPredMeanGraph <- function(Nrep, prior, rates, year = "", fill_curve = "white"){ | |
#shape_val <- prior$alpha + rates$num_vulns | |
#rate_val <- prior$lambda + rates$num_vulns * rates$avg_days | |
shape_val <- prior | |
rate_val <- rates | |
if( year != ""){ | |
year = paste0(year, " ") | |
} | |
theta_sim = rgamma(Nrep, shape_val, rate_val) | |
tratio <- 1 / theta_sim | |
ci <- ci(tratio, method="HDI") | |
mean_sim <- round(mean(tratio),2) | |
df <- tibble(sim=tratio) | |
res <- ggplot(df, aes(x=sim)) + | |
geom_histogram(aes(y=..density..), colour="black", fill="white",bins=100)+ | |
# Comment | |
geom_density(alpha=.2, fill= fill_curve)+ | |
geom_vline(aes(xintercept=ci$CI_low), | |
color="black", linetype="dashed", size=.5)+ | |
geom_vline(aes(xintercept=ci$CI_high), | |
color="black", linetype="dashed", size=.5)+ | |
geom_vline(aes(xintercept=mean_sim), | |
color="black", linetype="dashed", size=.75)+ | |
labs(title = paste0(year,"Posterior Distribution of Mean Inter-Arrivals"), | |
subtitle = paste0("Mean Rate: ", mean_sim," | 89% HDI: ", | |
round(ci$CI_low,2), " To: ", round(ci$CI_high,2))) + | |
xlab("Mean Waiting Time - Days") + ylab("Strength") | |
return(res) | |
} | |
GetGammaPoisChart <- function(prior,data){ | |
p_scale <- prior$alpha + data$num_vulns | |
p_rate <- prior$lambda + data$num_vulns * data$avg_days | |
rng <- GetRanges(prior, data, p_scale, p_rate) | |
sim_g <- rgamma(1000,p_scale,p_rate) | |
ci <- ci(sim_g,method = "HDI") | |
ggraph <- ggplot(data.frame(x = c(rng$min_r, rng$max_r)), aes(x = x)) + | |
stat_function(fun = dgamma, args = list(prior$alpha, prior$lambda), | |
aes(colour = "Prior"), size = 1.5, geom = "area", fill="goldenrod", alpha=0.2) + | |
stat_function(fun = dgamma, args = list(data$num_vulns, data$wait_time), | |
aes(colour = "Likelihood"), size = 1.5, geom = "area", fill="red", alpha=0.2) + | |
stat_function(fun = dgamma, args = list(p_scale, p_rate), | |
aes(colour = "Posterior"), size = 1.5, geom = "area", fill="blue", alpha=0.2) + | |
scale_x_continuous(name = "Vuln Rate")+ | |
scale_y_continuous(name = "Strength") + | |
ggtitle("Normal function curves of probabilities") + | |
scale_colour_manual("Bayes Curves", values = c("red", "blue", "goldenrod")) + | |
geom_vline(aes(xintercept=ci$CI_low),color="black", linetype="dashed", size=.5)+ | |
geom_vline(aes(xintercept=ci$CI_high),color="black", linetype="dashed", size=.5)+ | |
theme_bw() | |
return(ggraph) | |
} | |
GetFirstSeenHits <- function(vals,min = 1, max = 365){ | |
# Holder for Result | |
res <- c() | |
# Loop through day range | |
for(x in min:max){ | |
# If x is a day with a hit in vals...process | |
if(x %in% vals){ | |
# Get count of x's in vals update return vector | |
res <- c(res, sum(vals == x)) | |
}else{ | |
# x is not in vals, return a 0 to vector | |
res <- c(res,0) | |
} | |
} | |
return(res) | |
} | |
GetGammaPriors <- function(prior_int){ | |
boot_mean = function(original_vector, resample_vector) { | |
mean(original_vector[resample_vector]) | |
} | |
mean_results <- boot(prior_int, boot_mean, R = 2000) | |
qbs = quantile(mean_results$t, prob=c(.05, .95)) | |
obj = function(x){ | |
sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95))) | |
} | |
gvals = optim(par = c(1, 1), obj)$par | |
return(gvals) | |
} | |
GetNVDData <- function(year, new = FALSE, save = FALSE, from_file = FALSE){ | |
print(paste0("Year:", year)) | |
if(new == FALSE){ | |
nvd_df <- readRDS(paste0("nvd_",year,".RDS")) | |
return(nvd_df) | |
} | |
if( from_file == FALSE){ | |
temp <- tempfile() | |
download.file(paste0("https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-",year,".json.zip"),temp) | |
nvd <- fromJSON((unz(temp, paste0("nvdcve-1.1-",year,".json")))) | |
unlink(temp) | |
}else{ | |
nvd <- fromJSON((unz(paste0("nvdcve-1.1-",year,".json.zip"),paste0("nvdcve-1.1-",year,".json")))) | |
} | |
cves <- nvd$CVE_Items$cve$CVE_data_meta$ID | |
pub_date <- nvd$CVE_Items$publishedDate | |
mod_date <- nvd$CVE_Items$lastModifiedDate | |
b_score <- nvd$CVE_Items$impact$baseMetricV2$cvssV2$baseScore | |
sev <- nvd$CVE_Items$impact$baseMetricV2$severity | |
# int_act <- nvd$CVE_Items$impact$baseMetricV2$userInteractionRequired | |
# all_priv <- nvd$CVE_Items$impact$baseMetricV2$obtainAllPrivilege | |
# acc_vec <- nvd$CVE_Items$impact$baseMetricV2$cvssV2$accessVector | |
desc <- nvd$CVE_Items$cve$description$description_data | |
desc_vals = NULL | |
for(x in 1:length(desc)){ | |
if( x == 0){ | |
desc_vals <- desc[[x]]$value | |
}else{ | |
desc_vals <- c(desc_vals, desc[[x]]$value) | |
} | |
} | |
#nvd_df <- tibble(cves,pub_date,mod_date,b_score,sev, desc_vals, int_act, all_priv, acc_vec) | |
nvd_df <- tibble(cves,pub_date,mod_date,b_score,sev, desc_vals) | |
if( save == TRUE){ | |
saveRDS(nvd_df, file = paste0("nvd_",year,".RDS")) | |
} | |
return(nvd_df) | |
} | |
GetLargeVulnGroups <- function(new = FALSE, group_count = 50, ext = c(.005,.03), sev = c(.031, .07), hgh = c(.071, .15)){ | |
# If new then build simulation | |
if( new == TRUE){ | |
# Can take ~30 seconds to buid | |
dt <- GetVulnGroups(group_count, ext, sev, hgh) | |
# Save for future use | |
saveRDS(dt, file = "audit_df.RDS") | |
}else{ | |
# Instantiated saved file | |
dt <- readRDS("audit_df.RDS") | |
} | |
return(dt) | |
} | |
GetEscapeGroups <- function(dt, group_count){ | |
#dt <- year_one_group | |
dt$escaped <- GetEscapeVal(dt) | |
# Set a quarter of groups to strating in December | |
three_quarters <- group_count - round(group_count/4) | |
dt_one_month <- dt %>% filter(group %in% three_quarters:group_count & (week.fseen >= 48 & week.fseen <= 52)) | |
# Set another quarters of groups starting in October | |
half_count <- round(group_count/2) | |
dt_one_quarter <- dt %>% filter(group %in% half_count:(three_quarters-1) & (week.fseen >= 39 & week.fseen <= 52)) | |
# Have the remaining half start at the beginning of the year | |
dt_full <- dt %>% filter(group < half_count & week.fseen <= 52) | |
# Integrate all of the data frames back into one | |
dt <- bind_rows(dt_one_month, dt_one_quarter, dt_full) | |
# Remove unwanted data frames. | |
dt_one_month <- dt_one_quarter <- dt_full <- NULL | |
# Creat group aggregates | |
group_vals <- dt %>% | |
group_by(group) %>% | |
summarize(total_weeks = (max(week.fseen) - min(week.fseen)) + 1, | |
prod_vuln = sum(escaped==1), | |
dev_vuln = sum(escaped == 0), | |
total_vulns = prod_vuln + dev_vuln,.groups = 'drop') | |
return(group_vals) | |
} | |
GetEscapeVal <- function(dt){ | |
rand_vals <- runif(nrow(dt)) | |
#maturity_levels <- mapply(MaturityFlip,dt$dev_maturity) | |
prob_escape <- dt$beta_prob/dt$dev_maturity | |
dt$escaped <- if_else(rand_vals < prob_escape,1,0) | |
return(dt$escaped) | |
} | |
AddEscapeDF <- function(df1, df2){ | |
count <- max(nrow(df1),nrow(df2)) | |
res <- tibble() | |
for(x in 1:count){ | |
if(is.na(df1[x,1]) & is.na(df2[x,1])){ | |
#do nothing | |
}else if(is.na(df1[x,1]) & !is.na(df2[x,1])){ | |
tib <- df2[x,] | |
}else if(is.na(df2[x,1]) & !is.na(df1[x,1])){ | |
tib <- df1[x,] | |
}else{ | |
tib <- df1[x,] + df2[x,] | |
} | |
res <- bind_rows(res, tib) | |
} | |
return(res) | |
} | |
GetBurnMLE <- function(hits,misses){ | |
ll <- function(alpha, beta) { | |
x <- hits | |
total <- hits + misses | |
#print(total) | |
-sum(VGAM::dbetabinom.ab(x, total, alpha, beta, log = TRUE)) | |
} | |
m <- mle(ll, start = list(alpha = 1, beta = 10), method = "L-BFGS-B",lower = c(0.0001, .1)) | |
return(coef(m)) | |
} | |
MakeProportionChart <- function(df_val, xlab="Escape Rates w/95% Credible Intervals", ylab="Product Development Groups", | |
tval="Escape Rates By Product Group - With KPIs", | |
sval="Predictive Rates (black) vs Actuals (red)", | |
kpis = c(.05,.1,.2)){ | |
df_val %>% | |
ggplot(aes(emp_bayes_avg, rank(emp_bayes_avg))) + | |
geom_point() + | |
geom_point(aes(x = escape_avg), color = "red") + | |
geom_errorbarh(aes(xmin = low_ci, xmax = high_ci)) + | |
geom_vline(xintercept=kpis[1], color="maroon", size=1, linetype="dotted") + | |
geom_vline(xintercept=kpis[2], color="orangered2", size=1, linetype="dotted") + | |
geom_vline(xintercept=kpis[3], color="goldenrod", size=1, linetype="dotted") + | |
labs(x = xlab, | |
y = ylab, | |
title = tval, | |
subtitle = sval) + | |
theme_bw() | |
#return(res) | |
} | |
GetCritDF <- function(nvd_val, sev_val, match_string = "default"){ | |
if( match_string == "deafult"){ | |
match_string <- "Windows|Microsoft|Office|Mac|Apple|Browser|cisco|java|Oracle|adobe" | |
} | |
crit_df <- nvd_val %>% filter(b_score >= sev_val & grepl(match_string, desc_vals,ignore.case = TRUE)) | |
crit_df$pub_day <- yday(crit_df$pub_date) | |
crit_df$mod_day <- yday(crit_df$pub_date) | |
return(crit_df) | |
} | |
GetRanges <- function(prior, data, p_scale, p_rate){ | |
p_range <- qgamma(c(.001,.999),prior$alpha,prior$lambda) | |
d_range <- qgamma(c(.001,.999),data$num_vulns,data$wait_time) | |
r_range <- qgamma(c(.001,.999),p_scale,p_rate) | |
ranges <- c(p_range, d_range, r_range) | |
min_r <- min(ranges) - .01 | |
max_r <- max(ranges) + .01 | |
return(tibble(min_r, max_r)) | |
} | |
sla_check <- function(fix_time,severity,exposure,audit){ | |
# Extreme | |
if(severity == "extreme" & exposure == "external" & audit == 0){ | |
if( fix_time <= 7){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "extreme" & exposure == "external") | (severity == "critical" & exposure == "external" & audit == 0)){ | |
if( fix_time <= 14){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "critical" & exposure == "external") | (severity == "high" & exposure == "external" & audit == 0)){ | |
if(fix_time <= 21){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "high" & exposure == "external") | (severity == "extreme" & exposure == "partner" & audit == 0)){ | |
if(fix_time <= 28){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "extreme" & exposure == "partner") | (severity == "critical" & exposure == "partner" & audit == 0) ){ | |
if(fix_time <= 35){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "critical" & exposure == "partner") | (severity == "high" & exposure == "partner" & audit == 0) ){ | |
if(fix_time <= 42){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "high" & exposure == "partner") | (severity == "extreme" & exposure == "internal" & audit == 0) ){ | |
if(fix_time <= 50){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "extreme" & exposure == "internal") | (severity == "critical" & exposure == "internal" & audit == 0) ){ | |
if(fix_time <= 60){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if((severity == "critical" & exposure == "internal") | (severity == "high" & exposure == "internal" & audit == 0) ){ | |
if(fix_time <= 75){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
if(severity == "high" & exposure == "internal"){ | |
if(fix_time <= 90){ | |
return(1) | |
}else{ | |
return(0) | |
} | |
} | |
return(0) | |
} | |
getLogOddsInt <- function(prob, scale){ | |
# Turn probability into odds | |
odds <- prob/(1-prob) | |
# Turn odds into log odds | |
logOdds <- log(odds) | |
# Get a standard deviation value | |
sdev <- abs(round(logOdds/scale,2)) | |
# Return | |
return(c(logOdds,sdev)) | |
} | |
policy_pred <- function(mcmc, sev, exp, dev_val, audit_val){ | |
# simulate from posterior | |
res <- posterior_epred( mcmc, newdata = data.frame( | |
severity = sev, | |
exposure = exp, | |
dev_maturity = dev_val, | |
audit = audit_val)) | |
# Highest Density Interval | |
ci_val <- ci(res, method = "HDI") | |
ret_tib <- tibble(mean_est = mean(res), | |
ci_low = ci_val$CI_low, | |
ci_high = ci_val$CI_high) | |
return(ret_tib) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment