Last active
July 15, 2022 14:33
-
-
Save ribsy/b69567039ace893dfd0d882ad51020e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
my_packages <- c("tidyverse", "lubridate", "survival", "survminer", | |
"ProbBayes", "Bolstad", "ggplot2", "devtools", | |
"mc2d", "bayestestR", "scales", "knitr") | |
not_installed <- | |
my_packages[!(my_packages %in% installed.packages()[ , "Package"])] | |
if (length(not_installed)){ | |
msgVal = paste("Install packages: ",toString(not_installed)) | |
msgRes <- askYesNo(msgVal) | |
if( msgRes != TRUE){ | |
stopMsg <- paste("Quitting because you don't want to install:", not_installed) | |
stop(stopMsg) | |
} | |
} | |
if(length(not_installed)) install.packages(not_installed) | |
# Deal with ggdistribute - which is in github | |
my_packages <- c("ggdistribute") | |
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])] | |
if (length(not_installed)){ | |
msgVal = paste("Install ggdistribute?") | |
msgRes <- askYesNo(msgVal) | |
if( msgRes != TRUE){ | |
stopMsg <- paste("Quitting because you don't want to install ggdistribute") | |
stop(stopMsg) | |
} | |
} | |
if(length(not_installed)) install_github("iamamutt/ggdistribute") | |
library(devtools) | |
library(tidyverse) | |
library(lubridate) | |
library(survival) | |
library(survminer) | |
library(bayestestR) | |
library(ProbBayes) | |
library(Bolstad) | |
library(ggplot2) | |
library(mc2d) | |
library(scales) | |
library(knitr) | |
library(ggdistribute) | |
select <- dplyr::select | |
ScoreBetaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), hits = 1, misses = 5){ | |
probs <- bias | |
#eng_one <- c(first_belief[1], first_belief[2]) #one persons prior | |
#eng_two <- c(second_belief[1], second_belief[2]) #anotehr person' prior | |
beta_par <- rbind(first_belief, second_belief) #make dataframe | |
output <- binomial.beta.mix(bias, beta_par, c(hits, misses)) #prior scoring with data of 12 hits out of 20. | |
posterior_odds <- output$probs[1]/ output$probs[2] #Bayes Factor | |
return(posterior_odds) | |
} | |
ScoreGammaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), data = c(13,52)){ | |
probs=bias | |
beliefs=rbind(first_belief, second_belief) | |
data = list(y=data[1], t = data[2]) | |
mix <- poisson.gamma.mix(probs,beliefs,data) | |
scores <- mix$probs[1]/mix$probs[2] | |
return(scores) | |
} | |
MakeBetaGraph <- function(hit, miss, xlab, ylab, tlab, slab , xadj = .01 ){ | |
#print(paste0("Hits: ", hit)) | |
#print(paste0("Miss: ", miss)) | |
beta_avg = (hit/(miss + hit)) | |
#print(beta_avg) | |
posterior <- distribution_beta(1000, hit, miss) | |
ci_hdi <- ci(posterior, method = "HDI") | |
posterior <- posterior %>% | |
#estimate_density(extend=TRUE) | |
estimate_density() | |
ytop <- max(posterior$y) * .8 | |
posterior %>% | |
ggplot(aes(x=x, y=y)) + | |
geom_area(fill="darkslateblue", alpha = 0.6) + | |
theme_classic() + | |
geom_vline(xintercept=beta_avg, color="black", size=1) + | |
geom_vline(xintercept=ci_hdi$CI_low, color="black", size=1, linetype="dotted") + | |
geom_vline(xintercept=ci_hdi$CI_high, color="black", size=1, linetype="dotted") + | |
annotate(geom = "text", x = ci_hdi$CI_low + xadj , y = ytop, | |
label = round(ci_hdi$CI_low,3), fontface = 2, hjust = 0, vjust = -5) + | |
annotate(geom = "text", x = ci_hdi$CI_high + xadj , y = ytop, | |
label = round(ci_hdi$CI_high,3), fontface = 2, hjust = 0, vjust = -5) + | |
annotate(geom = "text", x = beta_avg + xadj , y = ytop, | |
label = round(beta_avg,3), fontface = 2, hjust = 0, vjust = -5) + | |
labs(x = xlab, y = ylab, title = tlab, subtitle = slab) | |
} | |
MakeRidgeChart2 <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){ | |
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob, | |
scale = 1.2, color = legend, fill = factor(legend,levels=rev(unique(legend))))) + | |
#scale_fill_manual(values = cbp1) + | |
geom_density_ridges(stat="identity", color = "white", alpha = 0.8, | |
panel_scaling = TRUE, size = 1) + | |
guides(fill=guide_legend(title= legend_val)) + | |
labs(title = title_val, | |
subtitle = sub_val, | |
x = xtitle, | |
y = ytitle) | |
} | |
MakeRidgeChart <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){ | |
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob, | |
scale = 1.2, color = legend, fill = factor(rev(legend),levels=rev(unique(legend))))) + | |
#scale_fill_manual(values = cbp1) + | |
geom_density_ridges(stat="identity", color = "white", alpha = 0.8, | |
panel_scaling = TRUE, size = 1) + | |
guides(fill=guide_legend(title= legend_val)) + | |
labs(title = title_val, | |
subtitle = sub_val, | |
x = xtitle, | |
y = ytitle) | |
} | |
GetRope <- function( eng_risk, month_one, month_two){ | |
mone <- eng_risk %>% filter(group == month_one) %>% select(closed, misses) %>% distinct() | |
mone_dist <- rbeta(1000, mone$closed, mone$misses) | |
mone_hdi <- ci(mone_dist, method = "HDI") | |
mtwo <- eng_risk %>% filter(group == month_two) %>% select(closed, misses) %>% distinct() | |
mtwo_dist <- rbeta(1000, mtwo$closed, mtwo$misses) | |
mtwo_hdi <- ci(mtwo_dist, method = "HDI") | |
diff_dist <- mtwo_dist - mone_dist | |
rope(diff_dist) | |
} | |
MakeVulnData <- function(vulnData, prior){ | |
hasGroup <- length(vulnData) | |
for( i in 1:nrow(vulnData)){ | |
if( i == 1){ | |
hits <- vulnData$hits[i] + prior[1] | |
misses <- vulnData$misses[i] + prior[2] | |
yaxis <- vulnData$yaxis[i] | |
if(hasGroup == 4){ | |
legend <- vulnData$legend[i] | |
}else{ | |
legend <- yaxis | |
} | |
res <- MakeBurnData(hits, misses, yaxis, legend) | |
eng_risk <- res | |
}else{ | |
hits <- vulnData$hits[i] + hits | |
misses <- vulnData$misses[i] + misses | |
yaxis <- vulnData$yaxis[i] | |
if(hasGroup == 4){ | |
legend <- vulnData$legend[i] | |
}else{ | |
legend <- yaxis | |
} | |
res <- MakeBurnData(hits, misses, yaxis, legend) | |
eng_risk <- bind_rows(res, eng_risk) | |
} | |
} | |
return(eng_risk) | |
} | |
MakeBurnData <- function(closed, misses, yaxis, legend){ | |
#total <- closed + misses | |
x_vals = seq(from=0,to=1,length = 1000) | |
#delta <- total - closed | |
dens = dbeta(x_vals,closed, misses) | |
prob = dens / max(dens) | |
res <- tibble(x_vals, yaxis , prob, legend, closed, misses, dens) | |
return(res[res$dens != "Inf",]) | |
} | |
GetBeliefsEvents <- function(belief, max_belief ){ | |
alpha_ratio <- as.numeric(belief) | |
beta_ratio <- as.numeric(max_belief) | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
GetBeliefsHits <- function(hits, max_hits, events ){ | |
alpha_ratio <- hits / events | |
beta_ratio <- max_hits / events | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
GetBetResults <- function(guess, tries, w.ball){ | |
guess_cost <- tries * 5 | |
diff_cost <- abs(w.ball - guess) * 5 | |
total_cost <- guess_cost + diff_cost | |
profit <- 100 - total_cost | |
print(paste0("Your Winnings: ", profit)) | |
} | |
GetRolls <- function(tries){ | |
w.ball <- sample(44,1) | |
val <- "" | |
for(x in 1:tries){ | |
val <- paste0(rollball(w.ball)," ",val) | |
} | |
print(val) | |
return(w.ball) | |
} | |
rollball <- function(w.ball){ | |
if( w.ball == 1){ | |
r.ball <- sample(2:44,1) | |
}else if( w.ball == 44){ | |
r.ball <- sample(1:43,1) | |
}else{ | |
r.ball <- sample(c(1:(w.ball - 1), (w.ball + 1):44),1) | |
} | |
if(r.ball < w.ball){ | |
return("Left") | |
}else{ | |
return("Right") | |
} | |
} | |
ErrorCheck <- function(base_configs, event_counts){ | |
a_total <- as.numeric(base_configs["a_n_events"]) | |
a_errors <- as.numeric(event_counts["product_a_errors"]) | |
b_total <- as.numeric(base_configs["b_n_events"]) | |
b_errors <- as.numeric(event_counts["product_b_errors"]) | |
#Remove errors to get on true events | |
a_true <- a_total - a_errors | |
b_true <- b_total - b_errors | |
cat(paste("A Total:",a_total,"A Errors:", a_errors,"A True:", a_true, "\n")) | |
cat(paste("B Total:",b_total,"A Errors:", b_errors,"B True:", b_true, "\n")) | |
# Obvious misconfiguration | |
if( a_total < a_errors){ | |
stop(paste("Exiting: Your errors for product A of: ", a_errors," exceed your total events for product A of: ", a_total)) | |
} | |
if( b_total < b_errors){ | |
stop(paste("Exiting: Your errors for product B of: ", b_errors," exceed your total events for product B of: ", b_total)) | |
} | |
# If A's True Vulns (i.e 30) is great than B's Total (i.e 25) - | |
if( b_total < a_true ){ | |
diff <- a_true - b_total | |
cat(paste("Product B is missing", diff, "false positive error(s). Add ", diff ," each to B's EVENTS and ERRORS\n")) | |
stop("Exiting") | |
} | |
if( a_total < b_true ){ | |
diff <- b_true - a_total | |
cat(paste("Product A is missing", diff, "false positive error(s). Add ", diff ," each to A's EVENTS and ERRORS\n")) | |
stop("Exiting") | |
} | |
#Is Prod A/B Missing errors - false negatives specifically? | |
if( a_true < b_true ){ | |
diff <- b_true - a_true | |
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product A for a new total of: ",diff + a_errors,"\n")) | |
return(c("product_a_errors", diff)) | |
} | |
if( b_true < a_true ){ | |
diff <- a_true - b_true | |
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product B for a new total of: ",diff + b_errors,"\n")) | |
return(c("product_b_errors", diff)) | |
} | |
return(c("No Change", 0)) | |
} | |
CatTotalCost <- function(total_costs, product_costs){ | |
#Get Cost Ratios | |
prod_a_total_costs <- total_costs[1] | |
prod_b_total_costs <- total_costs[2] | |
prod_ratio <- round( prod_a_total_costs / prod_b_total_costs, 3) * 100 | |
prod_ratio <- paste(prod_ratio, "%", sep = "") | |
#Format as dollars | |
prod_a_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_a_cost"])) | |
prod_a_total_final <- dollar_format()(round(as.numeric(prod_a_total_costs),2)) | |
prod_b_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_b_cost"])) | |
prod_b_total_final <- dollar_format()(round(as.numeric(prod_b_total_costs),2)) | |
#Print Message to stdout | |
cat("--------Which Product Cost Most To Operate Given Forecasted Errors-------\n") | |
cat(paste(" Product A is", prod_ratio , "the total cost of product B\n")) | |
cat(paste(" Product A (with base price of ", prod_a_orginal_cost ,") is expected to cost to operate with errors: ", prod_a_total_final,"\n")) | |
cat(paste(" Product B (with base price of ", prod_b_orginal_cost,") is expected to cost to operate with errors: ", prod_b_total_final,"\n")) | |
cat("\n Still uncertain? Get more data by running more tests!") | |
return(c(prod_ratio, prod_a_orginal_cost, prod_a_total_final, prod_b_orginal_cost, prod_b_total_final)) | |
#print(knitr::kable(out)) | |
} | |
GraphErrorImpact <- function(posterior, impact){ | |
newdf_a_impact <- data.frame(posterior = posterior$prod_a_impact, product = "Product A") | |
newdf_b_impact <- data.frame(posterior = posterior$prod_b_impact, product = "Product B") | |
newdf_ab_impact <- rbind(newdf_a_impact, newdf_b_impact) | |
impact_text <- paste(" Product A is", impact[1] , "the total cost of product B\n Product A (with base price of ", impact[2] ,") is expected to cost to operate with errors: ", impact[3],"\n Product B (with base price of ", impact[4],") is expected to cost to operate with errors: ", impact[5],"\n") | |
print(ggplot(newdf_ab_impact, aes(x = posterior, y = ..density.., fill = product)) + | |
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") + | |
scale_fill_manual(values=c("#333399", "#CCCCFF")) + | |
labs(title = "FORECASTING APPSEC ERROR IMPACT\nUsing Bayesian A/B Testing\n", x="Using Small Samples: Predicted Difference In Product Error Impacts", | |
subtitle = impact_text, | |
y = "Product Error Comparisons")) | |
} | |
CatErrorDiffProp <- function(posterior){ | |
error_rate_diff <- sum(posterior$prop_diff_events > 0 )/length(posterior$prop_diff_events) | |
cat(paste("--------Error Rate Difference For This Simmulation--------\n Probability that product A makes more errors that Product B is ",round(error_rate_diff * 100) ,"%\n\n", sep = "")) | |
} | |
GraphErrorDiff <- function(posterior){ | |
xval <- posterior$prop_diff_events | |
breaks <- pretty(range(xval), n = nclass.FD(xval), min.n = 1) | |
bwidth <- breaks[2]-breaks[1] | |
print(ggplot(posterior, aes(x=prop_diff_events)) + | |
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth = bwidth)+ | |
geom_density(alpha=.2, fill="#333399") + labs(title="Event Amount Difference: Product A - Product B",x="Event Rate", y = "Amount")+ | |
geom_vline(xintercept=0, color="red", size=1, linetype = "dotted") | |
) | |
} | |
GraphErrorRates <- function(posterior){ | |
newdf_a <- data.frame(posterior = posterior$prod_a, product = "Product A") | |
newdf_b <- data.frame(posterior = posterior$prod_b, product = "Product B") | |
newdf_ab <- rbind(newdf_a, newdf_b) | |
print(ggplot(newdf_ab, aes(x = posterior, y = ..density.., fill = product)) + | |
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") + | |
scale_fill_manual(values=c("#333399", "#CCCCFF")) + | |
labs(title = "Product A/B Testing", x="Using Small Samples: Predicted Difference In Product Error Rates", | |
y = "Product Comparisons")) | |
} | |
CatUpdatedBeliefRates <- function(posterior, event_counts, base_configs){ | |
out <- "--------Updated Forecasts Given Small Data, Beliefs & Lots of Simulations--------------\n" | |
# 89% Highest Density Intervals | |
prod_a_points <- point_estimate(posterior$prod_a) | |
ci_hdi_prod_a <- ci(posterior$prod_a, method = "HDI") | |
out <- paste(out, " The previous product A forecast combined with ", base_configs["a_n_events"] ," events and ", event_counts["product_a_errors"] ," errors is between:", | |
round(ci_hdi_prod_a$CI_low,2) * 100, "% and ", | |
round(ci_hdi_prod_a$CI_high,2) * 100, "% \n And centered near:", | |
round(prod_a_points[[1]],2) * 100, "%\n\n", sep="") | |
# 89% Highest Density Interval | |
prod_b_points <- point_estimate(posterior$prod_b) | |
ci_hdi_prod_b <- ci(posterior$prod_b, method = "HDI") | |
out <- paste(out, " The previous product B foreacst combined with ", base_configs["b_n_events"] ," events and ", event_counts["product_b_errors"]," errors is between:", | |
round(ci_hdi_prod_b$CI_low,2) * 100, "% and ", | |
round(ci_hdi_prod_b$CI_high,2) * 100, "% \n And centered near:", | |
round(prod_b_points[[1]],2) * 100, "%\n\n", sep="") | |
out <- paste(out, "See the markdown report or plot tab graphs in rstudio to see forecasts combined with data.\n") | |
cat(out) | |
} | |
CatPriorBeliefRates <- function(event_priors){ | |
#Pring Beta Curve | |
#print(MakeCurves(event_priors[["median_errors"]], event_priors[["edge_errors"]],color="Purple", CI=TRUE, CIT=TRUE)) | |
xval = "Event Probability" | |
yval = "Event Strength" | |
tval = "Credible Beliefs About Events" | |
sval = "With 95% Credible Belief Interval" | |
beta_vals <- GetBeliefsEvents(event_priors[["median_errors"]], event_priors[["edge_errors"]] ) | |
print(ggplotBeta(beta_vals[1], beta_vals[2], xlab = xval, ylab = yval, tlab = tval, sval = sval)) | |
#Get Beta Prio CIs | |
priorCI <- GetBetaCI(event_priors["median_errors"], event_priors["edge_errors"]) | |
cat("------Prior Beliefs About Error Rates--------\n") | |
cat(paste(" You forecasted an ERROR RATE equally ABOVE/BELOW ", round(event_priors[["median_errors"]]*100), "%.\n", sep="")) | |
cat(paste(" You're also 90% sure the error rate is LESS THAN ", round(event_priors[["edge_errors"]]*100), "%.\n", sep="")) | |
cat(paste(" Given that forecast and the model we are using you you believe the true rate likely sits between ", priorCI[1],"% and ", priorCI[3], "%\n\n", sep="")) | |
} | |
GetTotalCosts <- function(impacts, costs, time_multiple, event_counts, prior_events, base_configs){ | |
product_a_counts <- event_counts[[1]] | |
product_b_counts <- event_counts[[2]] | |
# Get Gamma Prior for product a which will get fed into poisson dist in GetEventFrequency() | |
a_number_events <- as.numeric(base_configs$a_n_events) | |
a_gamma_median_prior <- prior_events[[1]] * a_number_events | |
a_gamma_sd <- prior_events[[2]] * a_number_events | |
# Get Gamma Prior for product b which will get fed into poisson dist in GetEventFrequency() | |
b_number_events <- as.numeric(base_configs$b_n_events) | |
b_gamma_median_prior <- prior_events[[1]] * b_number_events | |
b_gamma_sd <- prior_events[[2]] * b_number_events | |
prod_a_event_results <- GetEventFrequency(product_a_counts, a_gamma_median_prior, a_gamma_sd) | |
prod_a_mean_events <- prod_a_event_results$mean | |
prod_a_event_totals <- prod_a_mean_events * as.numeric(time_multiple) | |
prod_b_event_results <- GetEventFrequency(product_b_counts, b_gamma_median_prior, b_gamma_sd) | |
prod_b_mean_events <- prod_b_event_results$mean | |
prod_b_event_totals <- prod_b_mean_events * as.numeric(time_multiple) | |
prod_a_total_cost <- prod_a_event_totals * impacts$prod_a_impact + costs$prod_a_cost + costs$prod_a_run | |
prod_b_total_cost <- prod_b_event_totals * impacts$prod_b_impact + costs$prod_b_cost + costs$prod_b_run | |
return(c(prod_a_total_cost, prod_b_total_cost) ) | |
} | |
GetEventFrequency <- function(event_counts, avg_event_count, event_variation, graph = FALSE){ | |
#rate vals can be a vector of results, perhaps events per day, week , month etc | |
# mode and sd will be converted into rate and scale. | |
event_count_priors <- GetRatePriors(avg_event_count, event_variation, graph) | |
result <- poisgamp(event_counts, event_count_priors[1], event_count_priors[2], plot = FALSE, suppressOutput = TRUE) | |
return(result) | |
} | |
GetRatePriors <- function(mode, sd, graph = FALSE){ | |
# This function allows you to think in terms of mode and SD. | |
# Note that everything is positive..so while the mode might be a small number that is close to 0 | |
# The SD can be large skewing far to the right, without going past zero on the left. | |
# Here are the corresponding rate and shape parameter values: | |
ra = ( mode + sqrt( mode^2 + 4*sd^2 ) ) / ( 2 * sd^2 ) | |
sh = 1 + mode * ra | |
if(graph){ | |
x = seq(0,mode+5*sd,len=1001) | |
plot( x , dgamma( x , shape=sh , rate=ra ) , type="l" , | |
main=paste("dgamma, mode=",mode,", sd=",sd,sep="") , | |
ylab=paste("dgamma( shape=",signif(sh,3)," , rate=",signif(ra,3)," )", | |
sep="") ) | |
abline( v=mode , lty="dotted" ) | |
} | |
return(c(sh, ra)) | |
} | |
GetEngCost <- function(size, eng_cost_pert, eng_hour_priors){ | |
mode <- as.numeric(eng_cost_pert[["mode"]]) | |
low <- as.numeric(eng_cost_pert[["low"]]) | |
high <- as.numeric(eng_cost_pert[["high"]]) | |
mode_hrs <- as.numeric(eng_hour_priors[["mode"]]) | |
low_hrs <- as.numeric(eng_hour_priors[["low"]]) | |
high_hrs <- as.numeric(eng_hour_priors[["high"]]) | |
eng_rate <- rpert(size, min = low, mode = mode, max = high, shape = 4) | |
eng_hours <- rpert(size, min = low_hrs, mode = mode_hrs, max = high_hrs, shape = 4) | |
eng_cost <- eng_hours * eng_rate | |
return(eng_cost) | |
} | |
GetErrors <- function(n_draws, n_events, proportion_events){ | |
n_errors <- rbinom(n = n_draws, size = n_events, prob = proportion_events) | |
return(n_errors) | |
} | |
GetPropotionOfEvents <- function(n_draws,prior_events){ | |
error_beliefs <- GetBeliefsEvents(prior_events[["median_errors"]], prior_events[["edge_errors"]]) | |
#Make an n_draws length list of error beliefs | |
proportion_events <- rbeta(as.numeric(n_draws), as.numeric(error_beliefs[[1]]) , as.numeric(error_beliefs[[2]]) ) | |
return(proportion_events) | |
} | |
GetBeliefsHits <- function(hits, max_hits, events ){ | |
alpha_ratio <- as.numeric(hits) / as.numeric(events) | |
beta_ratio <- as.numeric(max_hits) / as.numeric(events) | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
GetBeliefsEvents <- function(belief, max_belief ){ | |
alpha_ratio <- as.numeric(belief) | |
beta_ratio <- as.numeric(max_belief) | |
q1 = list(p=.5, x= alpha_ratio) | |
q2 = list(p=.9, x= beta_ratio) | |
return( beta.select(q1, q2) ) | |
} | |
SimulateHits <- function(freq_hits, num_events){ | |
freq_miss <- 1 - freq_hits | |
res <- sample(c("hit", "miss"), size=num_events, replace=TRUE, | |
prob=c(freq_hits, freq_miss)) | |
hit <- sum(res == "hit") | |
miss <- sum(res == "miss") | |
return( c(hit,miss)) | |
} | |
GetBetaCI <- function(hit,edge){ | |
betas <- GetBeliefsEvents(hit, edge) | |
intervals <- qbeta(c(.025, .5, .975), betas[1], betas[2]) | |
intervals <- round(intervals * 100) | |
return(intervals) | |
} | |
MakeCurves <- function(alpha = 0, beta = 0 , color = "black", ytop = 20, CI = FALSE, CIT = FALSE, add_val = FALSE){ | |
alpha <- alpha + 1 | |
beta <- beta + 1 | |
curve(dbeta(x, alpha , beta ), from=0, to=1, | |
xlab="SME Forecast For Proportion of Events", ylab="Density", | |
col= color, lwd=4, las=1, add = add_val , ylim = c(0,ytop)) | |
if( CI ){ | |
intervals <- qbeta(c(.025, .975), alpha, beta) | |
abline(v = intervals[1], lty=2) | |
abline(v = intervals[2], lty=2) | |
if( CIT ){ | |
adj <- 0 | |
if( intervals[2] - intervals[1] < 0.11 ){ | |
adj <- 1 | |
} | |
text(intervals[1] + 0.03,ytop, round(intervals[1], 2)) | |
text(intervals[2] + 0.03 ,ytop - adj, round(intervals[2], 2)) | |
} | |
} | |
} | |
ggplotBeta <- function(alpha, beta, xlab = xval, ylab = yval, tlab = "Beliefs", sval = "Test"){ | |
x = seq(0, 1, 0.01) | |
y = dbeta(x, shape1=alpha, shape2=beta) | |
density = data.frame(x=x, y=y) | |
mean=alpha/(alpha+beta) | |
mode=(alpha-1)/(alpha+beta-2) | |
intervals <- qbeta(c(.025, .975), alpha, beta) | |
res <- ggplot(density, aes(x, y)) + | |
geom_area(fill= "darkslateblue", alpha = 0.6) + | |
theme_minimal()+ | |
theme(axis.text.x = element_text(angle = 90, hjust = 1), | |
axis.text.y = element_blank()) + | |
geom_vline(xintercept = intervals[1]) + | |
geom_vline(xintercept = intervals[2])+ | |
labs(x = xlab, y = ylab, title = tlab, subtitle = sval) | |
return(res) | |
} | |
GetSize <- function(size, first, second){ | |
res <- ifelse(length(first) >= length(second), length(second), length(first) ) | |
if( size > res){ | |
return(res) | |
} | |
return(size) | |
} | |
GetEngHours <- function(size, med = 10, ninety = 60, total_hours = 100){ | |
beta_vals <- GetBeliefsHits(med, ninety, total_hours) | |
prop_hours <- rbeta(size, beta_vals[1], beta_vals[2]) | |
hours <- rbinom(size, total_hours, prop_hours) | |
return(hours) | |
} | |
#' ##Wrapper Function | |
#' This just makes it easy for users to call one function without much fuss. | |
runProductCostComparison <- function(base_configs, event_priors, event_counts, eng_hour_range, eng_cost_range, product_costs){ | |
# A Quick Check for missed false negatives between products and other sundry. | |
update_count <- ErrorCheck(base_configs, event_counts) | |
if( update_count[2] > 0){ | |
event_counts[update_count[[1]]] <- as.numeric(update_count[[2]]) + event_counts[[update_count[1]]] | |
if( update_count[1] == "product_a_errors"){ | |
base_configs$a_n_events <- base_configs$a_n_events + as.numeric(update_count[[2]]) | |
print(paste("Updating Product A Eventss: ", base_configs$a_n_events)) | |
} | |
if( update_count[1] == "product_b_errors"){ | |
base_configs$b_n_events <- base_configs$b_n_events + as.numeric(update_count[[2]]) | |
print(paste("Updating Product B Events: ", base_configs$a_n_events)) | |
} | |
} | |
#' ##Prior Beliefs About Error Rates | |
#' In the *event_priors* config element above you specified what you think is the likely error rate given N events. This graph shows you" | |
#' the spread of possible rates given our beliefs. It also gives some indication of where the most likely rates may cluster." | |
CatPriorBeliefRates(event_priors) | |
#Number of simulations (draws), tests (events), and default array index sizes | |
n_draws <- as.numeric(base_configs[["n_draws"]]) | |
a_n_events <- as.numeric(base_configs[["a_n_events"]]) | |
b_n_events <- as.numeric(base_configs[["b_n_events"]]) | |
size <- 10000 | |
#Create a list of simmulated events and error RATES based on priors | |
proportion_events <- GetPropotionOfEvents(n_draws, event_priors) | |
a_n_errors <- GetErrors(n_draws, a_n_events, proportion_events) | |
b_n_errors <- GetErrors(n_draws, b_n_events, proportion_events) | |
#Put simulations into dataframe | |
a_prior <- data.frame(proportion_events, a_n_errors) | |
b_prior <- data.frame(proportion_events, b_n_errors) | |
# Select the simulationed prior data that correlates to the real product errors | |
product_a <- a_prior[a_prior$a_n_errors == event_counts[["product_a_errors"]], ] | |
product_b <- b_prior[b_prior$b_n_errors == event_counts[["product_b_errors"]], ] | |
#Sample to create indices for posterior and load into data frame | |
sample_a_indices <- sample( nrow(product_a), size = size, replace = TRUE, prob = product_a$proportion_events) | |
sample_b_indices <- sample( nrow(product_b), size = size, replace = TRUE, prob = product_b$proportion_events) | |
posterior <- data.frame( | |
prod_a = product_a$proportion_events[sample_a_indices], | |
prod_b = product_b$proportion_events[sample_b_indices]) | |
#' Updated beliefs given real error rates | |
CatUpdatedBeliefRates(posterior, event_counts, base_configs) | |
#' Create and Print out Error Rate Graphs | |
GraphErrorRates(posterior) | |
#' Calculate the rate differences, graph and print out | |
posterior$prop_diff_events <- posterior$prod_a - posterior$prod_b | |
GraphErrorDiff(posterior) | |
CatErrorDiffProp(posterior) | |
#This gets our impact per event with lots of uncertainty packed in | |
eng_cost <- GetEngCost(size, eng_cost_range, eng_hour_range) | |
# Add forecasted expected cost column to posterior for product a and b and store in list | |
posterior$prod_a_impact <- posterior$prod_a * eng_cost | |
posterior$prod_b_impact <- posterior$prod_b * eng_cost | |
#Load Median values into list of use later | |
impacts <- list() | |
impacts["prod_a_impact"] <- median(posterior$prod_a_impact) | |
impacts["prod_b_impact"] <- median(posterior$prod_b_impact) | |
# Add the column that holds the difference between the two costs | |
posterior$prod_diff_impact <- posterior$prod_a_impact - posterior$prod_b_impact | |
print("Here is a glimpse into some of the data.") | |
print(head(posterior,10)) | |
total_costs <- GetTotalCosts(impacts, product_costs, base_configs["time_multiple"], event_counts, event_priors, base_configs) | |
#' #Total Costs Differences | |
impact_res <- CatTotalCost(total_costs, product_costs) | |
#' Graph Impact Difference | |
GraphErrorImpact(posterior, impact_res) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment