Skip to content

Instantly share code, notes, and snippets.

@ribsy
Last active July 15, 2022 14:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ribsy/b69567039ace893dfd0d882ad51020e9 to your computer and use it in GitHub Desktop.
Save ribsy/b69567039ace893dfd0d882ad51020e9 to your computer and use it in GitHub Desktop.
my_packages <- c("tidyverse", "lubridate", "survival", "survminer",
"ProbBayes", "Bolstad", "ggplot2", "devtools",
"mc2d", "bayestestR", "scales", "knitr")
not_installed <-
my_packages[!(my_packages %in% installed.packages()[ , "Package"])]
if (length(not_installed)){
msgVal = paste("Install packages: ",toString(not_installed))
msgRes <- askYesNo(msgVal)
if( msgRes != TRUE){
stopMsg <- paste("Quitting because you don't want to install:", not_installed)
stop(stopMsg)
}
}
if(length(not_installed)) install.packages(not_installed)
# Deal with ggdistribute - which is in github
my_packages <- c("ggdistribute")
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])]
if (length(not_installed)){
msgVal = paste("Install ggdistribute?")
msgRes <- askYesNo(msgVal)
if( msgRes != TRUE){
stopMsg <- paste("Quitting because you don't want to install ggdistribute")
stop(stopMsg)
}
}
if(length(not_installed)) install_github("iamamutt/ggdistribute")
library(devtools)
library(tidyverse)
library(lubridate)
library(survival)
library(survminer)
library(bayestestR)
library(ProbBayes)
library(Bolstad)
library(ggplot2)
library(mc2d)
library(scales)
library(knitr)
library(ggdistribute)
select <- dplyr::select
ScoreBetaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), hits = 1, misses = 5){
probs <- bias
#eng_one <- c(first_belief[1], first_belief[2]) #one persons prior
#eng_two <- c(second_belief[1], second_belief[2]) #anotehr person' prior
beta_par <- rbind(first_belief, second_belief) #make dataframe
output <- binomial.beta.mix(bias, beta_par, c(hits, misses)) #prior scoring with data of 12 hits out of 20.
posterior_odds <- output$probs[1]/ output$probs[2] #Bayes Factor
return(posterior_odds)
}
ScoreGammaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), data = c(13,52)){
probs=bias
beliefs=rbind(first_belief, second_belief)
data = list(y=data[1], t = data[2])
mix <- poisson.gamma.mix(probs,beliefs,data)
scores <- mix$probs[1]/mix$probs[2]
return(scores)
}
MakeBetaGraph <- function(hit, miss, xlab, ylab, tlab, slab , xadj = .01 ){
#print(paste0("Hits: ", hit))
#print(paste0("Miss: ", miss))
beta_avg = (hit/(miss + hit))
#print(beta_avg)
posterior <- distribution_beta(1000, hit, miss)
ci_hdi <- ci(posterior, method = "HDI")
posterior <- posterior %>%
#estimate_density(extend=TRUE)
estimate_density()
ytop <- max(posterior$y) * .8
posterior %>%
ggplot(aes(x=x, y=y)) +
geom_area(fill="darkslateblue", alpha = 0.6) +
theme_classic() +
geom_vline(xintercept=beta_avg, color="black", size=1) +
geom_vline(xintercept=ci_hdi$CI_low, color="black", size=1, linetype="dotted") +
geom_vline(xintercept=ci_hdi$CI_high, color="black", size=1, linetype="dotted") +
annotate(geom = "text", x = ci_hdi$CI_low + xadj , y = ytop,
label = round(ci_hdi$CI_low,3), fontface = 2, hjust = 0, vjust = -5) +
annotate(geom = "text", x = ci_hdi$CI_high + xadj , y = ytop,
label = round(ci_hdi$CI_high,3), fontface = 2, hjust = 0, vjust = -5) +
annotate(geom = "text", x = beta_avg + xadj , y = ytop,
label = round(beta_avg,3), fontface = 2, hjust = 0, vjust = -5) +
labs(x = xlab, y = ylab, title = tlab, subtitle = slab)
}
MakeRidgeChart2 <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob,
scale = 1.2, color = legend, fill = factor(legend,levels=rev(unique(legend))))) +
#scale_fill_manual(values = cbp1) +
geom_density_ridges(stat="identity", color = "white", alpha = 0.8,
panel_scaling = TRUE, size = 1) +
guides(fill=guide_legend(title= legend_val)) +
labs(title = title_val,
subtitle = sub_val,
x = xtitle,
y = ytitle)
}
MakeRidgeChart <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){
ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob,
scale = 1.2, color = legend, fill = factor(rev(legend),levels=rev(unique(legend))))) +
#scale_fill_manual(values = cbp1) +
geom_density_ridges(stat="identity", color = "white", alpha = 0.8,
panel_scaling = TRUE, size = 1) +
guides(fill=guide_legend(title= legend_val)) +
labs(title = title_val,
subtitle = sub_val,
x = xtitle,
y = ytitle)
}
GetRope <- function( eng_risk, month_one, month_two){
mone <- eng_risk %>% filter(group == month_one) %>% select(closed, misses) %>% distinct()
mone_dist <- rbeta(1000, mone$closed, mone$misses)
mone_hdi <- ci(mone_dist, method = "HDI")
mtwo <- eng_risk %>% filter(group == month_two) %>% select(closed, misses) %>% distinct()
mtwo_dist <- rbeta(1000, mtwo$closed, mtwo$misses)
mtwo_hdi <- ci(mtwo_dist, method = "HDI")
diff_dist <- mtwo_dist - mone_dist
rope(diff_dist)
}
MakeVulnData <- function(vulnData, prior){
hasGroup <- length(vulnData)
for( i in 1:nrow(vulnData)){
if( i == 1){
hits <- vulnData$hits[i] + prior[1]
misses <- vulnData$misses[i] + prior[2]
yaxis <- vulnData$yaxis[i]
if(hasGroup == 4){
legend <- vulnData$legend[i]
}else{
legend <- yaxis
}
res <- MakeBurnData(hits, misses, yaxis, legend)
eng_risk <- res
}else{
hits <- vulnData$hits[i] + hits
misses <- vulnData$misses[i] + misses
yaxis <- vulnData$yaxis[i]
if(hasGroup == 4){
legend <- vulnData$legend[i]
}else{
legend <- yaxis
}
res <- MakeBurnData(hits, misses, yaxis, legend)
eng_risk <- bind_rows(res, eng_risk)
}
}
return(eng_risk)
}
MakeBurnData <- function(closed, misses, yaxis, legend){
#total <- closed + misses
x_vals = seq(from=0,to=1,length = 1000)
#delta <- total - closed
dens = dbeta(x_vals,closed, misses)
prob = dens / max(dens)
res <- tibble(x_vals, yaxis , prob, legend, closed, misses, dens)
return(res[res$dens != "Inf",])
}
GetBeliefsEvents <- function(belief, max_belief ){
alpha_ratio <- as.numeric(belief)
beta_ratio <- as.numeric(max_belief)
q1 = list(p=.5, x= alpha_ratio)
q2 = list(p=.9, x= beta_ratio)
return( beta.select(q1, q2) )
}
GetBeliefsHits <- function(hits, max_hits, events ){
alpha_ratio <- hits / events
beta_ratio <- max_hits / events
q1 = list(p=.5, x= alpha_ratio)
q2 = list(p=.9, x= beta_ratio)
return( beta.select(q1, q2) )
}
GetBetResults <- function(guess, tries, w.ball){
guess_cost <- tries * 5
diff_cost <- abs(w.ball - guess) * 5
total_cost <- guess_cost + diff_cost
profit <- 100 - total_cost
print(paste0("Your Winnings: ", profit))
}
GetRolls <- function(tries){
w.ball <- sample(44,1)
val <- ""
for(x in 1:tries){
val <- paste0(rollball(w.ball)," ",val)
}
print(val)
return(w.ball)
}
rollball <- function(w.ball){
if( w.ball == 1){
r.ball <- sample(2:44,1)
}else if( w.ball == 44){
r.ball <- sample(1:43,1)
}else{
r.ball <- sample(c(1:(w.ball - 1), (w.ball + 1):44),1)
}
if(r.ball < w.ball){
return("Left")
}else{
return("Right")
}
}
ErrorCheck <- function(base_configs, event_counts){
a_total <- as.numeric(base_configs["a_n_events"])
a_errors <- as.numeric(event_counts["product_a_errors"])
b_total <- as.numeric(base_configs["b_n_events"])
b_errors <- as.numeric(event_counts["product_b_errors"])
#Remove errors to get on true events
a_true <- a_total - a_errors
b_true <- b_total - b_errors
cat(paste("A Total:",a_total,"A Errors:", a_errors,"A True:", a_true, "\n"))
cat(paste("B Total:",b_total,"A Errors:", b_errors,"B True:", b_true, "\n"))
# Obvious misconfiguration
if( a_total < a_errors){
stop(paste("Exiting: Your errors for product A of: ", a_errors," exceed your total events for product A of: ", a_total))
}
if( b_total < b_errors){
stop(paste("Exiting: Your errors for product B of: ", b_errors," exceed your total events for product B of: ", b_total))
}
# If A's True Vulns (i.e 30) is great than B's Total (i.e 25) -
if( b_total < a_true ){
diff <- a_true - b_total
cat(paste("Product B is missing", diff, "false positive error(s). Add ", diff ," each to B's EVENTS and ERRORS\n"))
stop("Exiting")
}
if( a_total < b_true ){
diff <- b_true - a_total
cat(paste("Product A is missing", diff, "false positive error(s). Add ", diff ," each to A's EVENTS and ERRORS\n"))
stop("Exiting")
}
#Is Prod A/B Missing errors - false negatives specifically?
if( a_true < b_true ){
diff <- b_true - a_true
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product A for a new total of: ",diff + a_errors,"\n"))
return(c("product_a_errors", diff))
}
if( b_true < a_true ){
diff <- a_true - b_true
cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product B for a new total of: ",diff + b_errors,"\n"))
return(c("product_b_errors", diff))
}
return(c("No Change", 0))
}
CatTotalCost <- function(total_costs, product_costs){
#Get Cost Ratios
prod_a_total_costs <- total_costs[1]
prod_b_total_costs <- total_costs[2]
prod_ratio <- round( prod_a_total_costs / prod_b_total_costs, 3) * 100
prod_ratio <- paste(prod_ratio, "%", sep = "")
#Format as dollars
prod_a_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_a_cost"]))
prod_a_total_final <- dollar_format()(round(as.numeric(prod_a_total_costs),2))
prod_b_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_b_cost"]))
prod_b_total_final <- dollar_format()(round(as.numeric(prod_b_total_costs),2))
#Print Message to stdout
cat("--------Which Product Cost Most To Operate Given Forecasted Errors-------\n")
cat(paste(" Product A is", prod_ratio , "the total cost of product B\n"))
cat(paste(" Product A (with base price of ", prod_a_orginal_cost ,") is expected to cost to operate with errors: ", prod_a_total_final,"\n"))
cat(paste(" Product B (with base price of ", prod_b_orginal_cost,") is expected to cost to operate with errors: ", prod_b_total_final,"\n"))
cat("\n Still uncertain? Get more data by running more tests!")
return(c(prod_ratio, prod_a_orginal_cost, prod_a_total_final, prod_b_orginal_cost, prod_b_total_final))
#print(knitr::kable(out))
}
GraphErrorImpact <- function(posterior, impact){
newdf_a_impact <- data.frame(posterior = posterior$prod_a_impact, product = "Product A")
newdf_b_impact <- data.frame(posterior = posterior$prod_b_impact, product = "Product B")
newdf_ab_impact <- rbind(newdf_a_impact, newdf_b_impact)
impact_text <- paste(" Product A is", impact[1] , "the total cost of product B\n Product A (with base price of ", impact[2] ,") is expected to cost to operate with errors: ", impact[3],"\n Product B (with base price of ", impact[4],") is expected to cost to operate with errors: ", impact[5],"\n")
print(ggplot(newdf_ab_impact, aes(x = posterior, y = ..density.., fill = product)) +
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") +
scale_fill_manual(values=c("#333399", "#CCCCFF")) +
labs(title = "FORECASTING APPSEC ERROR IMPACT\nUsing Bayesian A/B Testing\n", x="Using Small Samples: Predicted Difference In Product Error Impacts",
subtitle = impact_text,
y = "Product Error Comparisons"))
}
CatErrorDiffProp <- function(posterior){
error_rate_diff <- sum(posterior$prop_diff_events > 0 )/length(posterior$prop_diff_events)
cat(paste("--------Error Rate Difference For This Simmulation--------\n Probability that product A makes more errors that Product B is ",round(error_rate_diff * 100) ,"%\n\n", sep = ""))
}
GraphErrorDiff <- function(posterior){
xval <- posterior$prop_diff_events
breaks <- pretty(range(xval), n = nclass.FD(xval), min.n = 1)
bwidth <- breaks[2]-breaks[1]
print(ggplot(posterior, aes(x=prop_diff_events)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth = bwidth)+
geom_density(alpha=.2, fill="#333399") + labs(title="Event Amount Difference: Product A - Product B",x="Event Rate", y = "Amount")+
geom_vline(xintercept=0, color="red", size=1, linetype = "dotted")
)
}
GraphErrorRates <- function(posterior){
newdf_a <- data.frame(posterior = posterior$prod_a, product = "Product A")
newdf_b <- data.frame(posterior = posterior$prod_b, product = "Product B")
newdf_ab <- rbind(newdf_a, newdf_b)
print(ggplot(newdf_ab, aes(x = posterior, y = ..density.., fill = product)) +
stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") +
scale_fill_manual(values=c("#333399", "#CCCCFF")) +
labs(title = "Product A/B Testing", x="Using Small Samples: Predicted Difference In Product Error Rates",
y = "Product Comparisons"))
}
CatUpdatedBeliefRates <- function(posterior, event_counts, base_configs){
out <- "--------Updated Forecasts Given Small Data, Beliefs & Lots of Simulations--------------\n"
# 89% Highest Density Intervals
prod_a_points <- point_estimate(posterior$prod_a)
ci_hdi_prod_a <- ci(posterior$prod_a, method = "HDI")
out <- paste(out, " The previous product A forecast combined with ", base_configs["a_n_events"] ," events and ", event_counts["product_a_errors"] ," errors is between:",
round(ci_hdi_prod_a$CI_low,2) * 100, "% and ",
round(ci_hdi_prod_a$CI_high,2) * 100, "% \n And centered near:",
round(prod_a_points[[1]],2) * 100, "%\n\n", sep="")
# 89% Highest Density Interval
prod_b_points <- point_estimate(posterior$prod_b)
ci_hdi_prod_b <- ci(posterior$prod_b, method = "HDI")
out <- paste(out, " The previous product B foreacst combined with ", base_configs["b_n_events"] ," events and ", event_counts["product_b_errors"]," errors is between:",
round(ci_hdi_prod_b$CI_low,2) * 100, "% and ",
round(ci_hdi_prod_b$CI_high,2) * 100, "% \n And centered near:",
round(prod_b_points[[1]],2) * 100, "%\n\n", sep="")
out <- paste(out, "See the markdown report or plot tab graphs in rstudio to see forecasts combined with data.\n")
cat(out)
}
CatPriorBeliefRates <- function(event_priors){
#Pring Beta Curve
#print(MakeCurves(event_priors[["median_errors"]], event_priors[["edge_errors"]],color="Purple", CI=TRUE, CIT=TRUE))
xval = "Event Probability"
yval = "Event Strength"
tval = "Credible Beliefs About Events"
sval = "With 95% Credible Belief Interval"
beta_vals <- GetBeliefsEvents(event_priors[["median_errors"]], event_priors[["edge_errors"]] )
print(ggplotBeta(beta_vals[1], beta_vals[2], xlab = xval, ylab = yval, tlab = tval, sval = sval))
#Get Beta Prio CIs
priorCI <- GetBetaCI(event_priors["median_errors"], event_priors["edge_errors"])
cat("------Prior Beliefs About Error Rates--------\n")
cat(paste(" You forecasted an ERROR RATE equally ABOVE/BELOW ", round(event_priors[["median_errors"]]*100), "%.\n", sep=""))
cat(paste(" You're also 90% sure the error rate is LESS THAN ", round(event_priors[["edge_errors"]]*100), "%.\n", sep=""))
cat(paste(" Given that forecast and the model we are using you you believe the true rate likely sits between ", priorCI[1],"% and ", priorCI[3], "%\n\n", sep=""))
}
GetTotalCosts <- function(impacts, costs, time_multiple, event_counts, prior_events, base_configs){
product_a_counts <- event_counts[[1]]
product_b_counts <- event_counts[[2]]
# Get Gamma Prior for product a which will get fed into poisson dist in GetEventFrequency()
a_number_events <- as.numeric(base_configs$a_n_events)
a_gamma_median_prior <- prior_events[[1]] * a_number_events
a_gamma_sd <- prior_events[[2]] * a_number_events
# Get Gamma Prior for product b which will get fed into poisson dist in GetEventFrequency()
b_number_events <- as.numeric(base_configs$b_n_events)
b_gamma_median_prior <- prior_events[[1]] * b_number_events
b_gamma_sd <- prior_events[[2]] * b_number_events
prod_a_event_results <- GetEventFrequency(product_a_counts, a_gamma_median_prior, a_gamma_sd)
prod_a_mean_events <- prod_a_event_results$mean
prod_a_event_totals <- prod_a_mean_events * as.numeric(time_multiple)
prod_b_event_results <- GetEventFrequency(product_b_counts, b_gamma_median_prior, b_gamma_sd)
prod_b_mean_events <- prod_b_event_results$mean
prod_b_event_totals <- prod_b_mean_events * as.numeric(time_multiple)
prod_a_total_cost <- prod_a_event_totals * impacts$prod_a_impact + costs$prod_a_cost + costs$prod_a_run
prod_b_total_cost <- prod_b_event_totals * impacts$prod_b_impact + costs$prod_b_cost + costs$prod_b_run
return(c(prod_a_total_cost, prod_b_total_cost) )
}
GetEventFrequency <- function(event_counts, avg_event_count, event_variation, graph = FALSE){
#rate vals can be a vector of results, perhaps events per day, week , month etc
# mode and sd will be converted into rate and scale.
event_count_priors <- GetRatePriors(avg_event_count, event_variation, graph)
result <- poisgamp(event_counts, event_count_priors[1], event_count_priors[2], plot = FALSE, suppressOutput = TRUE)
return(result)
}
GetRatePriors <- function(mode, sd, graph = FALSE){
# This function allows you to think in terms of mode and SD.
# Note that everything is positive..so while the mode might be a small number that is close to 0
# The SD can be large skewing far to the right, without going past zero on the left.
# Here are the corresponding rate and shape parameter values:
ra = ( mode + sqrt( mode^2 + 4*sd^2 ) ) / ( 2 * sd^2 )
sh = 1 + mode * ra
if(graph){
x = seq(0,mode+5*sd,len=1001)
plot( x , dgamma( x , shape=sh , rate=ra ) , type="l" ,
main=paste("dgamma, mode=",mode,", sd=",sd,sep="") ,
ylab=paste("dgamma( shape=",signif(sh,3)," , rate=",signif(ra,3)," )",
sep="") )
abline( v=mode , lty="dotted" )
}
return(c(sh, ra))
}
GetEngCost <- function(size, eng_cost_pert, eng_hour_priors){
mode <- as.numeric(eng_cost_pert[["mode"]])
low <- as.numeric(eng_cost_pert[["low"]])
high <- as.numeric(eng_cost_pert[["high"]])
mode_hrs <- as.numeric(eng_hour_priors[["mode"]])
low_hrs <- as.numeric(eng_hour_priors[["low"]])
high_hrs <- as.numeric(eng_hour_priors[["high"]])
eng_rate <- rpert(size, min = low, mode = mode, max = high, shape = 4)
eng_hours <- rpert(size, min = low_hrs, mode = mode_hrs, max = high_hrs, shape = 4)
eng_cost <- eng_hours * eng_rate
return(eng_cost)
}
GetErrors <- function(n_draws, n_events, proportion_events){
n_errors <- rbinom(n = n_draws, size = n_events, prob = proportion_events)
return(n_errors)
}
GetPropotionOfEvents <- function(n_draws,prior_events){
error_beliefs <- GetBeliefsEvents(prior_events[["median_errors"]], prior_events[["edge_errors"]])
#Make an n_draws length list of error beliefs
proportion_events <- rbeta(as.numeric(n_draws), as.numeric(error_beliefs[[1]]) , as.numeric(error_beliefs[[2]]) )
return(proportion_events)
}
GetBeliefsHits <- function(hits, max_hits, events ){
alpha_ratio <- as.numeric(hits) / as.numeric(events)
beta_ratio <- as.numeric(max_hits) / as.numeric(events)
q1 = list(p=.5, x= alpha_ratio)
q2 = list(p=.9, x= beta_ratio)
return( beta.select(q1, q2) )
}
GetBeliefsEvents <- function(belief, max_belief ){
alpha_ratio <- as.numeric(belief)
beta_ratio <- as.numeric(max_belief)
q1 = list(p=.5, x= alpha_ratio)
q2 = list(p=.9, x= beta_ratio)
return( beta.select(q1, q2) )
}
SimulateHits <- function(freq_hits, num_events){
freq_miss <- 1 - freq_hits
res <- sample(c("hit", "miss"), size=num_events, replace=TRUE,
prob=c(freq_hits, freq_miss))
hit <- sum(res == "hit")
miss <- sum(res == "miss")
return( c(hit,miss))
}
GetBetaCI <- function(hit,edge){
betas <- GetBeliefsEvents(hit, edge)
intervals <- qbeta(c(.025, .5, .975), betas[1], betas[2])
intervals <- round(intervals * 100)
return(intervals)
}
MakeCurves <- function(alpha = 0, beta = 0 , color = "black", ytop = 20, CI = FALSE, CIT = FALSE, add_val = FALSE){
alpha <- alpha + 1
beta <- beta + 1
curve(dbeta(x, alpha , beta ), from=0, to=1,
xlab="SME Forecast For Proportion of Events", ylab="Density",
col= color, lwd=4, las=1, add = add_val , ylim = c(0,ytop))
if( CI ){
intervals <- qbeta(c(.025, .975), alpha, beta)
abline(v = intervals[1], lty=2)
abline(v = intervals[2], lty=2)
if( CIT ){
adj <- 0
if( intervals[2] - intervals[1] < 0.11 ){
adj <- 1
}
text(intervals[1] + 0.03,ytop, round(intervals[1], 2))
text(intervals[2] + 0.03 ,ytop - adj, round(intervals[2], 2))
}
}
}
ggplotBeta <- function(alpha, beta, xlab = xval, ylab = yval, tlab = "Beliefs", sval = "Test"){
x = seq(0, 1, 0.01)
y = dbeta(x, shape1=alpha, shape2=beta)
density = data.frame(x=x, y=y)
mean=alpha/(alpha+beta)
mode=(alpha-1)/(alpha+beta-2)
intervals <- qbeta(c(.025, .975), alpha, beta)
res <- ggplot(density, aes(x, y)) +
geom_area(fill= "darkslateblue", alpha = 0.6) +
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.text.y = element_blank()) +
geom_vline(xintercept = intervals[1]) +
geom_vline(xintercept = intervals[2])+
labs(x = xlab, y = ylab, title = tlab, subtitle = sval)
return(res)
}
GetSize <- function(size, first, second){
res <- ifelse(length(first) >= length(second), length(second), length(first) )
if( size > res){
return(res)
}
return(size)
}
GetEngHours <- function(size, med = 10, ninety = 60, total_hours = 100){
beta_vals <- GetBeliefsHits(med, ninety, total_hours)
prop_hours <- rbeta(size, beta_vals[1], beta_vals[2])
hours <- rbinom(size, total_hours, prop_hours)
return(hours)
}
#' ##Wrapper Function
#' This just makes it easy for users to call one function without much fuss.
runProductCostComparison <- function(base_configs, event_priors, event_counts, eng_hour_range, eng_cost_range, product_costs){
# A Quick Check for missed false negatives between products and other sundry.
update_count <- ErrorCheck(base_configs, event_counts)
if( update_count[2] > 0){
event_counts[update_count[[1]]] <- as.numeric(update_count[[2]]) + event_counts[[update_count[1]]]
if( update_count[1] == "product_a_errors"){
base_configs$a_n_events <- base_configs$a_n_events + as.numeric(update_count[[2]])
print(paste("Updating Product A Eventss: ", base_configs$a_n_events))
}
if( update_count[1] == "product_b_errors"){
base_configs$b_n_events <- base_configs$b_n_events + as.numeric(update_count[[2]])
print(paste("Updating Product B Events: ", base_configs$a_n_events))
}
}
#' ##Prior Beliefs About Error Rates
#' In the *event_priors* config element above you specified what you think is the likely error rate given N events. This graph shows you"
#' the spread of possible rates given our beliefs. It also gives some indication of where the most likely rates may cluster."
CatPriorBeliefRates(event_priors)
#Number of simulations (draws), tests (events), and default array index sizes
n_draws <- as.numeric(base_configs[["n_draws"]])
a_n_events <- as.numeric(base_configs[["a_n_events"]])
b_n_events <- as.numeric(base_configs[["b_n_events"]])
size <- 10000
#Create a list of simmulated events and error RATES based on priors
proportion_events <- GetPropotionOfEvents(n_draws, event_priors)
a_n_errors <- GetErrors(n_draws, a_n_events, proportion_events)
b_n_errors <- GetErrors(n_draws, b_n_events, proportion_events)
#Put simulations into dataframe
a_prior <- data.frame(proportion_events, a_n_errors)
b_prior <- data.frame(proportion_events, b_n_errors)
# Select the simulationed prior data that correlates to the real product errors
product_a <- a_prior[a_prior$a_n_errors == event_counts[["product_a_errors"]], ]
product_b <- b_prior[b_prior$b_n_errors == event_counts[["product_b_errors"]], ]
#Sample to create indices for posterior and load into data frame
sample_a_indices <- sample( nrow(product_a), size = size, replace = TRUE, prob = product_a$proportion_events)
sample_b_indices <- sample( nrow(product_b), size = size, replace = TRUE, prob = product_b$proportion_events)
posterior <- data.frame(
prod_a = product_a$proportion_events[sample_a_indices],
prod_b = product_b$proportion_events[sample_b_indices])
#' Updated beliefs given real error rates
CatUpdatedBeliefRates(posterior, event_counts, base_configs)
#' Create and Print out Error Rate Graphs
GraphErrorRates(posterior)
#' Calculate the rate differences, graph and print out
posterior$prop_diff_events <- posterior$prod_a - posterior$prod_b
GraphErrorDiff(posterior)
CatErrorDiffProp(posterior)
#This gets our impact per event with lots of uncertainty packed in
eng_cost <- GetEngCost(size, eng_cost_range, eng_hour_range)
# Add forecasted expected cost column to posterior for product a and b and store in list
posterior$prod_a_impact <- posterior$prod_a * eng_cost
posterior$prod_b_impact <- posterior$prod_b * eng_cost
#Load Median values into list of use later
impacts <- list()
impacts["prod_a_impact"] <- median(posterior$prod_a_impact)
impacts["prod_b_impact"] <- median(posterior$prod_b_impact)
# Add the column that holds the difference between the two costs
posterior$prod_diff_impact <- posterior$prod_a_impact - posterior$prod_b_impact
print("Here is a glimpse into some of the data.")
print(head(posterior,10))
total_costs <- GetTotalCosts(impacts, product_costs, base_configs["time_multiple"], event_counts, event_priors, base_configs)
#' #Total Costs Differences
impact_res <- CatTotalCost(total_costs, product_costs)
#' Graph Impact Difference
GraphErrorImpact(posterior, impact_res)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment