ribsy/manifesto_functions.R

## manifesto_functions.R

my_packages <- c("tidyverse", "lubridate", "survival", "survminer",
                 "ProbBayes", "Bolstad", "ggplot2", "devtools",
                 "mc2d", "bayestestR", "scales", "knitr", "ggridges",
                 "simstudy", "betareg", "data.table", "boot", "broom",
                 "stats4","jsonlite","tidybayes","bayesplot", "BayesFactor",
                 "rstanarm")
not_installed <-
  my_packages[!(my_packages %in% installed.packages()[ , "Package"])]

if (length(not_installed)){
  msgVal = paste("Install packages: ",toString(not_installed))

  msgRes <- askYesNo(msgVal)
  if( msgRes != TRUE){
    stopMsg <- paste("Quitting because you don't want to install:", not_installed)
    stop(stopMsg)
  }
}
if(length(not_installed)) install.packages(not_installed)

# Deal with ggdistribute - which is in github
my_packages <- c("ggdistribute")
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])]

if (length(not_installed)){
  msgVal = paste("Install ggdistribute?")
  msgRes <- askYesNo(msgVal)
  if( msgRes != TRUE){
     stopMsg <- paste("Quitting because you don't want to install ggdistribute")
     stop(stopMsg)
  }
}
if(length(not_installed)) install_github("iamamutt/ggdistribute")

library(devtools)
library(tidyverse)
library(lubridate)
library(survival)
library(survminer)
library(bayestestR)
library(ProbBayes)
library(Bolstad)
library(ggplot2)
library(mc2d)
library(scales)
library(knitr)
library(ggdistribute)
library(ggridges)
library(simstudy)
library(betareg)
library(data.table)
library(boot)
library(broom)
library(jsonlite)
library(stats4)
library(tidybayes)
library(bayesplot)
library(BayesFactor)
library(rstanarm)

select <- dplyr::select
`%!in%` <- Negate(`%in%`)


ScoreBetaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), hits = 1, misses = 5){
  probs <- bias
  beta_par <- rbind(first_belief, second_belief) #make dataframe
  output <- binomial.beta.mix(bias, beta_par, c(hits, misses)) #prior scoring with data of 12 hits out of 20.
  posterior_odds <- output$probs[1]/ output$probs[2] #Bayes Factor
  return(posterior_odds)
}

ScoreGammaBeliefs <- function(bias = c(0.5, 0.5), first_belief = c(2,2), second_belief = c(3,3), data = c(13,52)){
  probs=bias
  beliefs=rbind(first_belief, second_belief)
  data = list(y=data[1], t = data[2])
  mix <- poisson.gamma.mix(probs,beliefs,data)
  scores <- mix$probs[1]/mix$probs[2]
  return(scores)
}

MakeBetaGraph <- function(hit, miss, xlab, ylab, tlab, slab , xadj = .01 ){

  beta_avg = (hit/(miss + hit))

  posterior <- distribution_beta(1000, hit, miss)
  ci_hdi <- ci(posterior, method = "HDI")

  posterior <- posterior %>%
    #estimate_density(extend=TRUE)
    estimate_density()
  ytop <- max(posterior$y) * .8

  posterior %>%
    ggplot(aes(x=x, y=y)) +
    geom_area(fill="darkslateblue", alpha = 0.6) +
    theme_classic() +
    geom_vline(xintercept=beta_avg, color="black", size=1) +
    geom_vline(xintercept=ci_hdi$CI_low, color="black", size=1, linetype="dotted") +
    geom_vline(xintercept=ci_hdi$CI_high, color="black", size=1, linetype="dotted") +
    annotate(geom = "text", x = ci_hdi$CI_low + xadj , y = ytop,
             label = round(ci_hdi$CI_low,3), fontface = 2, hjust = 0, vjust = -5) +
    annotate(geom = "text", x = ci_hdi$CI_high + xadj , y = ytop,
             label = round(ci_hdi$CI_high,3), fontface = 2, hjust = 0, vjust = -5) +
    annotate(geom = "text", x = beta_avg + xadj , y = ytop,
             label = round(beta_avg,3), fontface = 2, hjust = 0, vjust = -5) +

    labs(x = xlab, y = ylab, title = tlab, subtitle = slab)
}

MakeRidgeChart2 <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){
  ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob,
                       scale = 1.2, color = legend, fill = factor(legend,levels=rev(unique(legend))))) +
    #scale_fill_manual(values = cbp1) +
    geom_density_ridges(stat="identity", color = "white", alpha = 0.8,
                        panel_scaling = TRUE, size = 1) +
    guides(fill=guide_legend(title= legend_val)) +
    labs(title = title_val,
         subtitle = sub_val,
         x = xtitle,
         y = ytitle)
}


MakeRidgeChart <- function(eng_risk, title_val, sub_val, xtitle, ytitle, legend_val){
  ggplot(eng_risk, aes(x = x_vals, y = factor(yaxis,levels=unique(yaxis)),height = prob,
                       scale = 1.2, color = legend, fill = factor(rev(legend),levels=rev(unique(legend))))) +
    #scale_fill_manual(values = cbp1) +
    geom_density_ridges(stat="identity", color = "white", alpha = 0.8,
                        panel_scaling = TRUE, size = 1) +
    guides(fill=guide_legend(title= legend_val)) +
    labs(title = title_val,
         subtitle = sub_val,
         x = xtitle,
         y = ytitle)
}

GetRope <- function( eng_risk, month_one, month_two){

  mone <- eng_risk %>% filter(yaxis == month_one) %>% select(closed, misses) %>% distinct()
  mone_dist <- rbeta(1000, mone$closed, mone$misses)
  mone_hdi <- ci(mone_dist, method = "HDI")

  mtwo <- eng_risk %>% filter(yaxis == month_two) %>% select(closed, misses) %>% distinct()
  mtwo_dist <- rbeta(1000, mtwo$closed, mtwo$misses)
  mtwo_hdi <- ci(mtwo_dist, method = "HDI")

  diff_dist <- mtwo_dist - mone_dist
  rope(diff_dist)
}


MakeVulnData <- function(vulnData, prior){
  hasGroup <- length(vulnData)
  for( i in 1:nrow(vulnData)){
    if( i == 1){
      hits <- vulnData$hits[i] + prior[1]
      misses <- vulnData$misses[i] + prior[2]
      yaxis <- vulnData$yaxis[i]
      if(hasGroup == 4){
        legend <- vulnData$legend[i]
      }else{
        legend <- yaxis
      }
      res <- MakeBurnData(hits, misses, yaxis, legend)
      eng_risk <- res
    }else{
      hits <- vulnData$hits[i] + hits
      misses <- vulnData$misses[i] + misses
      yaxis <- vulnData$yaxis[i]
      if(hasGroup == 4){
        legend <- vulnData$legend[i]
      }else{
        legend <- yaxis
      }
      res <- MakeBurnData(hits, misses, yaxis, legend)
      eng_risk <- bind_rows(res, eng_risk)
    }
  }
  return(eng_risk)
}

MakeBurnData <- function(closed, misses, yaxis, legend){
  x_vals = seq(from=0,to=1,length = 1000)
  dens = dbeta(x_vals,closed, misses)
  prob = dens / max(dens)
  res <- tibble(x_vals, yaxis , prob, legend, closed, misses, dens)
  return(res[res$dens != "Inf",])
}


GetBeliefsEvents <- function(belief, max_belief ){
  alpha_ratio <- as.numeric(belief)
  beta_ratio <- as.numeric(max_belief)
  q1 = list(p=.5, x= alpha_ratio)
  q2 = list(p=.9, x= beta_ratio)
  return( beta.select(q1, q2) )
}

GetBeliefsHits <- function(hits, max_hits, events ){
  alpha_ratio <- hits / events
  beta_ratio  <- max_hits / events
  q1 = list(p=.5, x= alpha_ratio)
  q2 = list(p=.9, x= beta_ratio)
  return( beta.select(q1, q2) )
}

GetBetResults <- function(guess, tries, w.ball){
  guess_cost <- tries * 5
  diff_cost <- abs(w.ball - guess) * 5
  total_cost <- guess_cost + diff_cost
  profit <- 100 - total_cost
  print(paste0("Your Winnings: ", profit))
}

GetRolls <- function(tries){
  w.ball <- sample(44,1)
  val <- ""
  for(x in 1:tries){
    val <- paste0(rollball(w.ball)," ",val)
  }
  print(val)
  return(w.ball)
}

rollball <- function(w.ball){
  if( w.ball == 1){
    r.ball <- sample(2:44,1)
  }else if( w.ball == 44){
    r.ball <- sample(1:43,1)
  }else{
    r.ball <- sample(c(1:(w.ball - 1), (w.ball + 1):44),1)
  }

  if(r.ball < w.ball){
    return("Left")
  }else{
    return("Right")
  }
}

ErrorCheck <- function(base_configs, event_counts){

  a_total <- as.numeric(base_configs["a_n_events"])
  a_errors <- as.numeric(event_counts["product_a_errors"])
  b_total <- as.numeric(base_configs["b_n_events"])
  b_errors <- as.numeric(event_counts["product_b_errors"])

  #Remove errors to get on true events
  a_true <- a_total - a_errors
  b_true <- b_total - b_errors
  cat(paste("A Total:",a_total,"A Errors:", a_errors,"A True:", a_true, "\n"))
  cat(paste("B Total:",b_total,"A Errors:", b_errors,"B True:", b_true, "\n"))

  # Obvious misconfiguration
  if( a_total < a_errors){
    stop(paste("Exiting: Your errors for product A of: ", a_errors," exceed your total events for product A of: ", a_total))
  }
  if( b_total < b_errors){
    stop(paste("Exiting: Your errors for product B of: ", b_errors," exceed your total events for product B of: ", b_total))
  }

  # If A's True Vulns (i.e 30) is great than B's Total (i.e 25) -
  if( b_total < a_true ){
    diff <- a_true - b_total
    cat(paste("Product B is missing", diff, "false positive error(s). Add ", diff ," each to B's EVENTS and ERRORS\n"))
    stop("Exiting")
  }
  if( a_total < b_true ){
    diff <- b_true - a_total
    cat(paste("Product A is missing", diff, "false positive error(s). Add ", diff ," each to A's EVENTS and ERRORS\n"))
    stop("Exiting")
  }

  #Is Prod A/B Missing errors - false negatives specifically?
  if( a_true < b_true ){
    diff <- b_true - a_true
    cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product A for a new total of: ",diff + a_errors,"\n"))
    return(c("product_a_errors", diff))
  }

  if( b_true < a_true ){
    diff <- a_true - b_true
    cat(paste("Adding ", diff ," FALSE NEGATIVE errors to product B for a new total of: ",diff + b_errors,"\n"))
    return(c("product_b_errors", diff))
  }

  return(c("No Change", 0))
}

CatTotalCost <- function(total_costs, product_costs){

  #Get Cost Ratios
  prod_a_total_costs <- total_costs[1]
  prod_b_total_costs <- total_costs[2]
  prod_ratio <- round( prod_a_total_costs / prod_b_total_costs, 3) * 100
  prod_ratio <- paste(prod_ratio, "%", sep = "")

  #Format as dollars
  prod_a_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_a_cost"]))
  prod_a_total_final <- dollar_format()(round(as.numeric(prod_a_total_costs),2))
  prod_b_orginal_cost <- dollar_format()(as.numeric(product_costs["prod_b_cost"]))
  prod_b_total_final <- dollar_format()(round(as.numeric(prod_b_total_costs),2))

  #Print Message to stdout

  cat("--------Which Product Cost Most To Operate Given Forecasted Errors-------\n")
  cat(paste(" Product A is", prod_ratio , "the total cost of product B\n"))
  cat(paste(" Product A (with base price of ", prod_a_orginal_cost ,") is expected to cost to operate with errors: ", prod_a_total_final,"\n"))
  cat(paste(" Product B (with base price of ", prod_b_orginal_cost,") is expected to cost to operate with errors: ", prod_b_total_final,"\n"))
  cat("\n Still uncertain? Get more data by running more tests!")

  return(c(prod_ratio, prod_a_orginal_cost, prod_a_total_final, prod_b_orginal_cost, prod_b_total_final))
  #print(knitr::kable(out))
}

GraphErrorImpact <- function(posterior, impact){
  newdf_a_impact <- data.frame(posterior = posterior$prod_a_impact, product = "Product A")
  newdf_b_impact <- data.frame(posterior = posterior$prod_b_impact, product = "Product B")
  newdf_ab_impact <- rbind(newdf_a_impact, newdf_b_impact)
  impact_text <- paste(" Product A is", impact[1] , "the total cost of product B\n Product A (with base price of ", impact[2] ,") is expected to cost to operate with errors: ", impact[3],"\n Product B (with base price of ", impact[4],") is expected to cost to operate with errors: ", impact[5],"\n")


  print(ggplot(newdf_ab_impact, aes(x = posterior, y = ..density.., fill = product)) +
          stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") +
          scale_fill_manual(values=c("#333399", "#CCCCFF"))  +
          labs(title = "FORECASTING APPSEC ERROR IMPACT\nUsing Bayesian A/B Testing\n", x="Using Small Samples: Predicted Difference In Product Error Impacts",
               subtitle = impact_text,
               y = "Product Error Comparisons"))
}

CatErrorDiffProp <- function(posterior){
  error_rate_diff <- sum(posterior$prop_diff_events > 0 )/length(posterior$prop_diff_events)
  cat(paste("--------Error Rate Difference For This Simmulation--------\n Probability that product A makes more errors that Product B is ",round(error_rate_diff * 100) ,"%\n\n", sep = ""))
}

GraphErrorDiff <- function(posterior){

  xval <- posterior$prop_diff_events
  breaks <- pretty(range(xval), n = nclass.FD(xval), min.n = 1)
  bwidth <- breaks[2]-breaks[1]
  print(ggplot(posterior, aes(x=prop_diff_events)) +
          geom_histogram(aes(y=..density..), colour="black", fill="white", binwidth = bwidth)+
          geom_density(alpha=.2, fill="#333399") + labs(title="Event Amount Difference: Product A - Product B",x="Event Rate", y = "Amount")+
          geom_vline(xintercept=0, color="red", size=1, linetype = "dotted")
  )
}

GraphErrorRates <- function(posterior){

  newdf_a <- data.frame(posterior = posterior$prod_a, product = "Product A")
  newdf_b <- data.frame(posterior = posterior$prod_b, product = "Product B")
  newdf_ab <- rbind(newdf_a, newdf_b)

  print(ggplot(newdf_ab, aes(x = posterior, y = ..density.., fill = product)) +
          stat_density_ci(alpha = 0.5, n = 1024, geom = "density",position = "identity") +
          scale_fill_manual(values=c("#333399", "#CCCCFF"))  +
          labs(title = "Product A/B Testing", x="Using Small Samples: Predicted Difference In Product Error Rates",
               y = "Product Comparisons"))
}

CatUpdatedBeliefRates <- function(posterior, event_counts, base_configs){

  out <- "--------Updated Forecasts Given Small Data, Beliefs & Lots of Simulations--------------\n"

  # 89% Highest Density Intervals
  prod_a_points <- point_estimate(posterior$prod_a)
  ci_hdi_prod_a <- ci(posterior$prod_a, method = "HDI")
  out <- paste(out, " The previous product A forecast combined with ", base_configs["a_n_events"] ," events and ", event_counts["product_a_errors"] ," errors is between:",
               round(ci_hdi_prod_a$CI_low,2) * 100, "% and ",
               round(ci_hdi_prod_a$CI_high,2) * 100, "% \n    And centered near:",
               round(prod_a_points[[1]],2) * 100, "%\n\n", sep="")

  # 89% Highest Density Interval
  prod_b_points <- point_estimate(posterior$prod_b)
  ci_hdi_prod_b <- ci(posterior$prod_b, method = "HDI")
  out <- paste(out, " The previous product B foreacst combined with ", base_configs["b_n_events"] ," events and ",  event_counts["product_b_errors"]," errors is between:",
               round(ci_hdi_prod_b$CI_low,2) * 100, "% and ",
               round(ci_hdi_prod_b$CI_high,2) * 100, "% \n    And centered near:",
               round(prod_b_points[[1]],2) * 100, "%\n\n", sep="")
  out <- paste(out, "See the markdown report or plot tab graphs in rstudio to see forecasts combined with data.\n")
  cat(out)
}

CatPriorBeliefRates <- function(event_priors){

  #Pring Beta Curve
  #print(MakeCurves(event_priors[["median_errors"]], event_priors[["edge_errors"]],color="Purple", CI=TRUE, CIT=TRUE))
  xval = "Event Probability"
  yval = "Event Strength"
  tval = "Credible Beliefs About Events"
  sval = "With 95% Credible Belief Interval"
  beta_vals <- GetBeliefsEvents(event_priors[["median_errors"]], event_priors[["edge_errors"]] )

  print(ggplotBeta(beta_vals[1],  beta_vals[2],  xlab = xval, ylab = yval, tlab = tval, sval = sval))
  #print(MakeBetaGraph(beta_vals[1],  beta_vals[2],  xlab = xval, ylab = yval, tlab = tval,slab = sval))
  #Get Beta Prio CIs
  priorCI <- GetBetaCI(event_priors["median_errors"], event_priors["edge_errors"])

  cat("------Prior Beliefs About Error Rates--------\n")
  cat(paste(" You forecasted an ERROR RATE equally ABOVE/BELOW ", round(event_priors[["median_errors"]]*100), "%.\n", sep=""))
  cat(paste(" You're also 90% sure the error rate is LESS THAN ", round(event_priors[["edge_errors"]]*100), "%.\n", sep=""))
  cat(paste(" Given that forecast and the model we are using you you believe the true rate likely sits between ", priorCI[1],"% and ", priorCI[3], "%\n\n", sep=""))

}

GetTotalCosts <- function(impacts, costs, time_multiple, event_counts, prior_events, base_configs){

  product_a_counts <- event_counts[[1]]
  product_b_counts <- event_counts[[2]]

  # Get Gamma Prior for product a which will get fed into poisson dist in GetEventFrequency()
  a_number_events <- as.numeric(base_configs$a_n_events)
  a_gamma_median_prior <- prior_events[[1]] * a_number_events
  a_gamma_sd <- prior_events[[2]] * a_number_events

  # Get Gamma Prior for product b which will get fed into poisson dist in GetEventFrequency()
  b_number_events <- as.numeric(base_configs$b_n_events)
  b_gamma_median_prior <- prior_events[[1]] * b_number_events
  b_gamma_sd <- prior_events[[2]] * b_number_events

  prod_a_event_results <- GetEventFrequency(product_a_counts, a_gamma_median_prior, a_gamma_sd)
  prod_a_mean_events <- prod_a_event_results$mean
  prod_a_event_totals <- prod_a_mean_events * as.numeric(time_multiple)

  prod_b_event_results <- GetEventFrequency(product_b_counts, b_gamma_median_prior, b_gamma_sd)
  prod_b_mean_events <- prod_b_event_results$mean
  prod_b_event_totals <- prod_b_mean_events * as.numeric(time_multiple)

  prod_a_total_cost <- prod_a_event_totals * impacts$prod_a_impact + costs$prod_a_cost + costs$prod_a_run
  prod_b_total_cost <- prod_b_event_totals * impacts$prod_b_impact + costs$prod_b_cost + costs$prod_b_run

  return(c(prod_a_total_cost, prod_b_total_cost) )
}

GetEventFrequency <- function(event_counts, avg_event_count, event_variation, graph = FALSE){
  #rate vals can be a vector of results, perhaps events per day, week , month etc
  # mode and sd will be converted into rate and scale.
  event_count_priors <- GetRatePriors(avg_event_count, event_variation, graph)

  result <- poisgamp(event_counts, event_count_priors[1], event_count_priors[2], plot = FALSE, suppressOutput = TRUE)

  return(result)
}

GetRatePriors <- function(mode, sd, graph = FALSE){
  # This function allows you to think in terms of mode and SD.
  # Note that everything is positive..so while the mode might be a small number that is close to 0
  # The SD can be large skewing far to the right, without going past zero on the left.

  # Here are the corresponding rate and shape parameter values:
  ra = ( mode + sqrt( mode^2 + 4*sd^2 ) ) / ( 2 * sd^2 )
  sh = 1 + mode * ra

  if(graph){
    x = seq(0,mode+5*sd,len=1001)
    plot( x , dgamma( x , shape=sh , rate=ra ) , type="l" ,
          main=paste("dgamma, mode=",mode,", sd=",sd,sep="") ,
          ylab=paste("dgamma( shape=",signif(sh,3)," , rate=",signif(ra,3)," )",
                     sep="") )
    abline( v=mode , lty="dotted" )
  }

  return(c(sh, ra))
}

GetEngCost <- function(size, eng_cost_pert, eng_hour_priors){

  mode <- as.numeric(eng_cost_pert[["mode"]])
  low  <- as.numeric(eng_cost_pert[["low"]])
  high <- as.numeric(eng_cost_pert[["high"]])

  mode_hrs <- as.numeric(eng_hour_priors[["mode"]])
  low_hrs <- as.numeric(eng_hour_priors[["low"]])
  high_hrs <- as.numeric(eng_hour_priors[["high"]])

  eng_rate <- rpert(size, min = low, mode = mode, max = high, shape = 4)
  eng_hours <- rpert(size, min = low_hrs, mode = mode_hrs, max = high_hrs, shape = 4)
  eng_cost <- eng_hours * eng_rate

  return(eng_cost)
}


GetErrors <- function(n_draws, n_events, proportion_events){
  n_errors <- rbinom(n = n_draws, size = n_events, prob = proportion_events)
  return(n_errors)
}

GetPropotionOfEvents <- function(n_draws,prior_events){
  error_beliefs <- GetBeliefsEvents(prior_events[["median_errors"]], prior_events[["edge_errors"]])
  #Make an n_draws length list of error beliefs
  proportion_events <- rbeta(as.numeric(n_draws), as.numeric(error_beliefs[[1]]) , as.numeric(error_beliefs[[2]]) )
  return(proportion_events)
}

SimulateHits <- function(freq_hits, num_events){
  freq_miss <- 1 - freq_hits
  res <- sample(c("hit", "miss"), size=num_events, replace=TRUE,
                prob=c(freq_hits, freq_miss))
  hit <- sum(res == "hit")
  miss <- sum(res ==  "miss")
  return( c(hit,miss))
}

GetBetaCI <- function(hit,edge){
  betas <- GetBeliefsEvents(hit, edge)
  intervals <- qbeta(c(.025, .5, .975), betas[1], betas[2])
  intervals <- round(intervals * 100)
  return(intervals)
}

MakeCurves <- function(alpha = 0, beta = 0 , color = "black", ytop = 20, CI = FALSE, CIT = FALSE, add_val = FALSE){
  alpha <- alpha + 1
  beta <- beta + 1
  curve(dbeta(x, alpha , beta ), from=0, to=1,
        xlab="SME Forecast For Proportion of Events", ylab="Density",
        col= color, lwd=4, las=1, add = add_val , ylim = c(0,ytop))

  if( CI ){
    intervals <- qbeta(c(.025, .975), alpha, beta)
    abline(v = intervals[1], lty=2)
    abline(v = intervals[2], lty=2)
    if( CIT ){
      adj <- 0
      if( intervals[2] - intervals[1] < 0.11 ){
        adj <- 1
      }
      text(intervals[1] + 0.03,ytop, round(intervals[1], 2))
      text(intervals[2] + 0.03 ,ytop - adj, round(intervals[2], 2))
    }
  }
}

ggplotBeta <- function(alpha, beta, xlab = xval, ylab = yval, tlab = "Beliefs", sval = "Test"){

  x = seq(0, 1, 0.01)
  y = dbeta(x, shape1=alpha, shape2=beta)
  density = data.frame(x=x, y=y)
  mean=alpha/(alpha+beta)
  mode=(alpha-1)/(alpha+beta-2)
  intervals <- qbeta(c(.025, .975), alpha, beta)

  #print(paste0("sval: ", sval, " intervals: ", round(intervals[1],2), " to ",round(intervals[2],2) ))
  sval <- paste0(sval, " of ", round(intervals[1],2), " to ", round(intervals[2],2), " and mean: ", round(mean,2))
  res <- ggplot(density, aes(x, y)) +
    geom_area(fill= "darkslateblue", alpha = 0.6) +
    theme_minimal()+
    theme(axis.text.x = element_text(angle = 90, hjust = 1),
          axis.text.y = element_blank()) +
    geom_vline(xintercept = intervals[1]) +
    geom_vline(xintercept = intervals[2])+
    geom_vline(xintercept = mean)+
    labs(x = xlab, y = ylab, title = tlab, subtitle = sval)

  return(res)
}

GetSize <- function(size, first, second){
  res <- ifelse(length(first) >= length(second), length(second), length(first) )

  if( size > res){
    return(res)
  }
  return(size)
}

GetEngHours <- function(size, med = 10, ninety = 60, total_hours = 100){

  beta_vals <- GetBeliefsHits(med, ninety, total_hours)
  prop_hours <- rbeta(size, beta_vals[1], beta_vals[2])
  hours <- rbinom(size, total_hours, prop_hours)
  return(hours)
}

#' ##Wrapper Function
#' This just makes it easy for users to call one function without much fuss.
runProductCostComparison <- function(base_configs, event_priors, event_counts, eng_hour_range, eng_cost_range, product_costs){

  # A Quick Check for missed false negatives between products and other sundry.
  update_count <- ErrorCheck(base_configs, event_counts)

  if( update_count[2] > 0){
    event_counts[update_count[[1]]] <- as.numeric(update_count[[2]]) + event_counts[[update_count[1]]]
    if( update_count[1] == "product_a_errors"){
      base_configs$a_n_events <- base_configs$a_n_events +  as.numeric(update_count[[2]])
      print(paste("Updating Product A Events: ", base_configs$a_n_events))
    }

    if( update_count[1] == "product_b_errors"){
      base_configs$b_n_events <- base_configs$b_n_events +  as.numeric(update_count[[2]])
      print(paste("Updating Product B Events: ", base_configs$a_n_events))
    }
  }

  #' ##Prior Beliefs About Error Rates
  #' In the *event_priors* config element above you specified what you think is the likely error rate given N events. This graph shows you"
  #' the spread of possible rates given our beliefs. It also gives some indication of where the most likely rates may cluster."
  CatPriorBeliefRates(event_priors)

  #Number of simulations (draws), tests (events), and default array index sizes
  n_draws <- as.numeric(base_configs[["n_draws"]])
  a_n_events <- as.numeric(base_configs[["a_n_events"]])
  b_n_events <- as.numeric(base_configs[["b_n_events"]])
  size <- 10000

  #Create a list of simmulated events and error RATES based on priors
  proportion_events <- GetPropotionOfEvents(n_draws, event_priors)
  a_n_errors <- GetErrors(n_draws, a_n_events, proportion_events)
  b_n_errors <- GetErrors(n_draws, b_n_events, proportion_events)

  #Put simulations into dataframe
  a_prior <- data.frame(proportion_events, a_n_errors)
  b_prior <- data.frame(proportion_events, b_n_errors)
  #print(head(b_prior,10))

  # Select the simulationed prior data that correlates to the real product errors
  product_a <- a_prior[a_prior$a_n_errors == event_counts[["product_a_errors"]], ]
  product_b <- b_prior[b_prior$b_n_errors == event_counts[["product_b_errors"]], ]
  #print(head(product_a))
  #print(head(product_b))

  #Sample to create indices for posterior and load into data frame
  sample_a_indices <- sample( nrow(product_a), size = size, replace = TRUE, prob = product_a$proportion_events)
  sample_b_indices <- sample( nrow(product_b), size = size, replace = TRUE, prob = product_b$proportion_events)
  posterior <- data.frame(
    prod_a = product_a$proportion_events[sample_a_indices],
    prod_b  = product_b$proportion_events[sample_b_indices])

  #print(head(posterior))


  #' Updated beliefs given real error rates
  CatUpdatedBeliefRates(posterior, event_counts, base_configs)

  #' Create and Print out Error Rate Graphs
  GraphErrorRates(posterior)

  #' Calculate the rate differences, graph and print out
  posterior$prop_diff_events <- posterior$prod_a - posterior$prod_b
  print(head(posterior,10))
  GraphErrorDiff(posterior)
  CatErrorDiffProp(posterior)

  #This gets our impact per event with lots of uncertainty packed in
  eng_cost <- GetEngCost(size, eng_cost_range, eng_hour_range)

  # Add forecasted expected cost column to posterior for product a and b and store in list
  posterior$prod_a_impact <- posterior$prod_a * eng_cost
  posterior$prod_b_impact <- posterior$prod_b * eng_cost

  #Load Median values into list of use later
  impacts <- list()
  impacts["prod_a_impact"] <- median(posterior$prod_a_impact)
  impacts["prod_b_impact"] <- median(posterior$prod_b_impact)

  # Add the column that holds the difference between the two costs
  posterior$prod_diff_impact <- posterior$prod_a_impact - posterior$prod_b_impact
  print("Here is a glimpse into some of the data.")
  #print(head(posterior,10))
  total_costs <- GetTotalCosts(impacts, product_costs, base_configs["time_multiple"], event_counts, event_priors, base_configs)

  #' #Total Costs Differences
  impact_res <- CatTotalCost(total_costs, product_costs)

  #' Graph Impact Difference
  GraphErrorImpact(posterior, impact_res)
}


GetVulnPredictions <- function(beta_closed, beta_opened, future_counts){
  ab = c(beta_closed, beta_opened)
  future_count = future_counts;
  future_list = 0:future_count
  plf = pbetap(ab, future_count, future_list)
  return(round(cbind(future_list, plf), digits=3))
}

GetVulnsOpenedByWks <- function(data_table = dt, week_list = 1:52, group_list = 1:10, audit_list = 0:1, exposure_list = 1:3){
  vulns <- data_table %>%
    filter(week.fseen %in% week_list & group %in% group_list & audit %in% audit_list & exposure_id %in% exposure_list) %>%
    select(week.fseen, severity, group, hits) %>%
    group_by(group,severity, week.fseen) %>%
    summarise(ext_count = sum(hits),.groups = 'drop')

  #52 weeks of zeros 3X
  empty_week <- tibble(week.fseen = rep(1:52,length(group_list)),ext_count = 0)

  # Include Weeks With Zero Counts
  vuln_wks <- right_join(vulns,empty_week, by = "week.fseen") %>%
    mutate( new_count = if_else(is.na(ext_count.x),0, ext_count.x)) %>%
    select(week.fseen, severity,group, new_count) %>% distinct() %>%
    group_by(week.fseen, severity) %>% summarize(vuln_count = sum(new_count),.groups = 'drop') %>%
    arrange(week.fseen) %>% distinct()

  vuln_wks <- vuln_wks %>% pivot_wider(id_cols = c(week.fseen),
                                       names_from = severity, values_from = c(vuln_count),values_fill=0)

  vuln_wks <- vuln_wks %>% select(week.fseen, extreme, critical, high)

  col_len <- length(vuln_wks)
  col_start <- col_len - 2
  vuln_wks <- vuln_wks %>% replace(is.na(.), 0) %>% mutate(all = rowSums(across(col_start:col_len)))
  vuln_wks <- vuln_wks %>% select(week.fseen, extreme, critical, high, all)
  return(vuln_wks)
}

GetVulnGroups <- function(group_count = 10, ext = c(.01,.03), sev = c(.031, .07), hgh = c(.071, .15)){

  dt <- tibble()
  for(x in 1:group_count){
    extreme <- sample(seq(from = ext[1], to = ext[2], by= .001), size = 1)
    severe <- sample(seq(from = sev[1], to = sev[2], by= .001), size = 1)
    high <- sample(seq(from = hgh[1], to = hgh[2], by= .001), size = 1)
    patch_fail <- sample(1:4,size=1)
    dev_maturity <- sample(1:4,size=1)
    #print(paste0("Ext: ",extreme," Sev: ", severe, " High: ", high, " Patch Fail: ", patch_fail, " Dev Mat: ", dev_maturity))
    res <- GetAdvSecResults(c(extreme, severe, high), patch_fail, dev_maturity)
    res$group <- x
    dt <- bind_rows(dt,res)
  }
  return(dt)
}

betaFormula <- function(audit, severity_id, exposure_id, time_adj, nDays, dev_maturity){
  adj_val <- ((nDays*2)/365) * (dev_maturity * 3)
  #print(adj_val)
  alpha <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj))
  #print(alpha)
  mult <- if_else(alpha < 10, 2.5, if_else(alpha < 30,2,1.5))
  beta <- alpha * mult
  avg <- (alpha - adj_val)/(alpha + beta)
  avg[avg <= 0] <- .005
  #print(avg)
  return(avg)
}

betaVariance <- function(audit, severity_id, exposure_id, time_adj){
  alpha <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj))
  mult <- if_else(alpha < 10, 2, if_else(alpha < 30,2,1.5))
  beta <- alpha * mult
  ret <- ((alpha * beta)/(alpha + beta)^2 * (alpha + beta + 1))

  return(round(ret,4))
}

binomVariance <- function(audit, severity_id, exposure_id, time_adj){
  avg <- (((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43)) * (1 + time_adj))
  mult <- if_else(avg < 10, 4, if_else(avg < 30,3,2))
  var <- round(avg * mult)
  #print(paste0("Binom Var: ", var))
  return(var)
}

GetSecResults <- function(events = c(.05,.10,.2), event_corr = 0.3, patch_fail = 1){
  #Simplify, 6 team combinations, so groups of 6
  team_count <- 6
  days <- 365
  total <- days * team_count
  eventSum <- sum(events)

  # Create 6 Groups of 365 Each.
  team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam")
  team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days)
  team <- genData(team_count, team)
  team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id")

  #Put days into the nDays colums
  team$nDays <- (team$id - (365*(team$idTeam - 1)))

  # Probablity each day will have one or more extreme, critical or high risk events.
  # Mild correlation between the variables
  dt <- genCorGen(n = total, nvars = 3, params1 = c(events[1], events[2], events[3]),
                  dist = "binary", rho = event_corr, corstr = "cs", wide = TRUE,
                  method = "ep",cnames = "extreme_event,critical_event,high_event", id = "id")


  # For each day with an Ext,Crit,High events randomly draw a small number of hits for each
  # These are slightly more collelated.
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "extreme_event", cnames = "extreme")
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "critical_event", cnames = "critical")
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "high_event", cnames = "high")

  #Event showed 1 but poisson returned 0 - Make it at least 1
  dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1
  dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1
  dt$high[dt$high_event == 1 & dt$high == 0] <- 1

  #Merge the Team/Days data with the hits/events data
  dt <- mergeData(team, dt, "id")

  #Add Exposure Categories for Audit and Exposure Type
  dt$audit <- if_else(dt$id > total/2,1,0)

  #Set Exposure Type
  dt$exposure[dt$idTeam %in% c(1, 4)] <- "external"
  dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner"
  dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal"

  #Add Exposure ID Field
  dt$exposure_id[dt$exposure == "external"] <- 1
  dt$exposure_id[dt$exposure == "partner"] <- 2
  dt$exposure_id[dt$exposure == "internal"] <- 3

  #Add Names based on audit and exposure type - note that this is just text, idTeam is most important.
  dt <- genFactor(dt, "idTeam",
                  labels = c("external audit", "partner audit", "internal audit",
                             "external no audit", "partner no audit", "internal no audit"))

  #Copy team name over and get a clean dataframe.
  dt$team <- dt$fidTeam

  ## I think this may be the error
  dt <- dt %>% select(idTeam, team, nDays, id, extreme, critical, high, audit, exposure, exposure_id)

  #Need To move melt to later as it messes with defCondition
  dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","audit","exposure","exposure_id" ),
             measure.vars = c("extreme", "critical", "high"),
             variable.name = "severity", value.name = "hits")

  #Cache records with Zeros for Poisson
  dtZeros <- dt %>% filter(hits == 0)

  #Get One Vuln Per Row i.e. it drops zeros...
  dt <- dt[rep(1:.N,hits)]
  dt$hits <- 1

  #Reset ID field
  dt$id <- 1:length(dt$idTeam)
  setkey(dt,id)

  #####Get Remediation Times

  # Make numeric for formula input
  dt$severity_id[dt$severity == "extreme"] <- 1
  dt$severity_id[dt$severity == "critical"] <- 2
  dt$severity_id[dt$severity == "high"] <- 3

  # Set Fix Time Based On Vuln, Asset and Audit
  # Note small adjustment upward based on event rate divided by 2
  defFix <- defDataAdd(varname = "fix_time",
                       formula = "((audit*3 + severity_id * 3 + exposure_id * 3) * (severity_id*1.43))",
                       dist = "noZeroPoisson")

  dt <- addColumns(defFix, dt)

  #Fix Date
  defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom")
  dt <- addColumns(defFixDate, dt)

  #Chance of Something NOT getting fixed goes up with more time.
  defStatus <- defDataAdd(varname = "status", formula = "(fix_time/..patch_fail)/365", dist = "binary")
  dt <- addColumns(defStatus, dt)

  #Add date stamps for first seen and last seen
  dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"),
                           as.Date(dt$nDays, origin = "2021-01-01"))

  dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"),
                          as.Date(dt$fix_date, origin = "2021-01-01"))

  #Add week number for simplicity
  dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52)))
  dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52))

  return(dt)
}


# Advance Option
GetAdvSecResults <- function(events = c(.02,.10,.2), patch_fail = 1, dev_maturity = 4){


  ###### Parameters ##########
  # EVENTS = chance of having an event for extreme, crit, high per day
  # PATCH_FAIL = higher the number less like to change the status of a vuln to "still open"
  # DEV_MATURITY = 1-4 maturity level. Higher increases the likelihood of an event NOT happening.
  #                works with where you are in the year in terms of days and other factors to
  #                adjust the likelihood of events. It also has an impact on rate of closure.

  ###################
  ## Define Constants

  #Simplify, 6 team combinations, so groups of 6
  team_count <- 6
  days <- 365
  total <- days * team_count
  eventSum <- sum(events)

  ###############
  ## TEAM SECTION

  # Create 6 Groups of 365 Each.
  team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam")
  team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days)
  team <- defData(team, varname = "dev_maturity", dist = "nonrandom", formula = "..dev_maturity")
  team <- genData(team_count, team)
  team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id")

  #Put days into the nDays colums
  team$nDays <- (team$id - (365*(team$idTeam - 1)))

  ########################
  ## Vulnerability Section

  # Probablity each day will have one or more extreme, critical or high risk events.
  # Mild correlation between the variables
  dt <- genData(n=team_count)
  dt <- addPeriods(dt, days,perName = "nDays")
  dt$nDays <- dt$nDays + 1

  #ifelval <- if_else((events[1] - ((round(dt$nDays/(90 * (4/dev_maturity)),1)/100) * 1/2)) < 0, .005,events[1] - ((round(dt$nDays/(90 * (4/dev_maturity)),1)/100) * 1/2))
  #print(paste0("ifelval res: ", ifelval))

  defExtEvent <- defDataAdd(varname = "extreme_event", formula = "if_else((..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2)) < 0, .005,..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2))", dist = "binary")
  dt <- addColumns(defExtEvent, dt)

  defCritEvent <- defDataAdd(varname = "critical_event", formula = "if_else((..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2)) < 0, .005,..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2))", dist = "binary")
  dt <- addColumns(defCritEvent, dt)

  defHighEvent <- defDataAdd(varname = "high_event", formula = "if_else((..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2)) < 0, .005,..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2))", dist = "binary")
  dt <- addColumns(defHighEvent, dt)
  #view(dt)
  #Use dplyr to get fields, and must reset id for data.table
  #print(dt)
  dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event, dev_maturity)
  #dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event)
  setkey(dt,id)

  # For each day with an Ext,Crit,High events randomly draw a small number of hits for each
  # These are slightly more collelated.
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 3, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "extreme_event", cnames = "extreme,foo,bar")
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 3, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "critical_event", cnames = "critical,foo,bar")
  dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 3, rho = .4, corstr = "cs",
                  dist = "poisson", param1 = "high_event", cnames = "high,foo,bar")

  dt <- dt %>% select(!c("foo", "bar"))

  #view(dt)
  #Event showed 1 but poisson returned 0 - Make it at least 1
  dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1
  dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1
  dt$high[dt$high_event == 1 & dt$high == 0] <- 1

  ################
  ## Asset Section

  #Merge the Team/Days data with the hits/events data
  dt <- mergeData(team, dt, "id")

  #Add Exposure Categories for Audit and Exposure Type
  dt$audit <- if_else(dt$id > total/2,1,0)

  #Set Exposure Type
  dt$exposure[dt$idTeam %in% c(1, 4)] <- "external"
  dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner"
  dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal"

  #Add Exposure ID Field
  dt$exposure_id[dt$exposure == "external"] <- 1
  dt$exposure_id[dt$exposure == "partner"] <- 2
  dt$exposure_id[dt$exposure == "internal"] <- 3

  #Add Names based on audit and exposure type - note that this is just text, idTeam is most important.
  dt <- genFactor(dt, "idTeam",
                  labels = c("external audit", "partner audit", "internal audit",
                             "external no audit", "partner no audit", "internal no audit"))

  #Copy team name over and get a clean dataframe.
  dt$team <- dt$fidTeam


  ###################
  ## Reformat Section

  ## Select important columns
  dt <- dt %>% select(idTeam, team, nDays, id, dev_maturity, extreme, critical, high, audit, exposure, exposure_id)

  #Need To move melt to later as it messes with defCondition
  dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","dev_maturity","audit","exposure","exposure_id" ),
             measure.vars = c("extreme", "critical", "high"),
             variable.name = "severity", value.name = "hits")

  #Cache records with Zeros for Poisson
  #dtZeros <- dt %>% filter(hits == 0)


  dt$hits <- if_else(is.na(dt$hits),0,dt$hits)

  #Get One Vuln Per Row i.e. it drops zeros...
  dt <- dt[rep(1:.N,hits)]


  #Reset ID field
  dt$id <- 1:length(dt$idTeam)
  setkey(dt,id)

  ######
  ## Remediation Time Section

  # Make numeric for formula input
  dt$severity_id[dt$severity == "extreme"] <- 1
  dt$severity_id[dt$severity == "critical"] <- 2
  dt$severity_id[dt$severity == "high"] <- 3


  #Ranomom Time adjustment roughly based on event volume. Used for Fix Time Generator
  defTimeAdj <- defDataAdd(varname = "time_adj", formula = eventSum, variance = .1, dist = "normal")
  dt <- addColumns(defTimeAdj, dt)

  dt$time_adj <- if_else(dt$time_adj <= 0 , .01, dt$time_adj)

  #print(dt)
  #Get Probablity Associated With Time from Beta Dist
  #print(paste0("BetaFormula: ", betaFormula(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj, dt$nDays, dt$dev_maturity)))
  #print(paste0("BetaVariance: ", betaVariance(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj)))
  betaDef <- defDataAdd(varname = "beta_prob",
                        formula = "betaFormula(audit,severity_id, exposure_id,time_adj, nDays, dev_maturity)",
                        variance = "betaVariance(audit,severity_id, exposure_id,time_adj)",
                        dist = "beta")

  #print(betaDef)
  #Add column with list of probabilities
  dt <- addColumns(betaDef, dt)

  #print(paste0("Beta Prob: ",dt$beta_prob))
  #print(paste0("Variance: ", binomVariance(dt$audit,dt$severity_id, dt$exposure_id,dt$time_adj)))

  # Use beta probabilities as input into binaomial for time to fix
  binDef <- defDataAdd(varname = "fix_time",
                       formula = "beta_prob",
                       variance = "binomVariance(audit,severity_id, exposure_id,time_adj)",
                       dist = "binomial"
  )
  #print(binDef)
  #Create column for time to fix.
  dt <- addColumns(binDef, dt)

  #Update any time to fix zero values with a 1
  dt$fix_time[dt$fix_time == 0] <- 1

  #Fix Date
  defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom")
  dt <- addColumns(defFixDate, dt)

  #Chance of Something NOT getting fixed goes up with more time.
  defStatus <- defDataAdd(varname = "status", formula = "if_else((fix_time/..patch_fail)/365 > 1,1,(fix_time/..patch_fail)/365)", dist = "binary")
  dt <- addColumns(defStatus, dt)

  #Add date stamps for first seen and last seen
  dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"),
                           as.Date(dt$nDays, origin = "2021-01-01"))

  dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"),
                          as.Date(dt$fix_date, origin = "2021-01-01"))

  #Add week number for simplicity
  dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52)))
  dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52))

  return(dt)
}

GetArrivalPrior <- function(v_open){
  # Functions used to get optimized gamma values
  boot_mean = function(original_vector, resample_vector) {
    mean(original_vector[resample_vector])
  }
  obj = function(x){
    sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95)))
  }

  mean_results <- boot(v_open, boot_mean, R = 2000)
  qbs = quantile(mean_results$t, prob=c(.05, .95))

  theta = optim(par = c(1, 1), obj)$par
  return(theta)
}

plotGammaAdv <- function(theta, prior, ytop = 1,xval = "Rate", yval = "Density",
                         tlab = "Rate Prior", slab = ""){
  pdf <- tibble(gamma_val = prior, theta_val = theta)
  ggplot(pdf,aes(x=theta, y=gamma_val)) +
    geom_density(aes(x=theta,y=gamma_val),stat="identity",fill = "darkslateblue",
                 alpha = 0.6) +
    xlab(xval) +
    ylab(yval) +
    labs(title = tlab, subtitle = slab, xlab = xval, ylab = yval)
}

plot_posterior <- function(theta, prior, likelihood, posterior) {

  ymax = max(c(prior, posterior))

  scaled_likelihood = likelihood * ymax / max(likelihood)

  plot(theta, prior, type='l', col='orange', xlim= range(theta), ylim=c(0, ymax), ylab='',xlab='Rate', yaxt='n')
  par(new=T)
  plot(theta, scaled_likelihood, type='l', col='skyblue', xlim=range(theta), ylim=c(0, ymax), ylab='' ,xlab='Rate', yaxt='n')
  par(new=T)
  plot(theta, posterior, type='l', col='seagreen', xlim=range(theta), ylim=c(0, ymax), ylab='',xlab='Rate', yaxt='n')
  legend("topright", c("prior", "likelihood", "posterior"), lty=1, col=c("orange", "skyblue", "seagreen"))
}

posteriorGraph <- function(samples){
  ci_hdi <- ci(samples, method = "HDI", verbose = FALSE)
  ci_hdi_small <- ci(samples, method = "HDI", ci = .50, verbose = FALSE)

  pdf <- tibble(samples = samples)
  ggplot(pdf, aes(x = samples)) +
    #geom_area(fill="darkslateblue", alpha = 0.6)
    geom_density(fill = "darkslateblue", alpha = 0.6, adjust = 4.5) +
    geom_vline(xintercept = ci_hdi$CI_low, color = "black", size = .5) +
    geom_vline(xintercept = ci_hdi$CI_high, color = "black", size = .5) +
    geom_vline(xintercept = ci_hdi_small$CI_low, color = "red", size = .5) +
    geom_vline(xintercept = ci_hdi_small$CI_high, color = "red", size = .5)
}

GetNVDAnalysis <- function(years, sev_val = 9, prior_mult = 1, end = 365, theta_range = c(.5,2.0),
                           new = FALSE, save = FALSE, fill_curve = "white", match_string = "default", from_file = FALSE){

  Nrep = 10000
  count = 1

  if( match_string == "default"){
    match_string= "Windows|Microsoft|Office|Mac|Apple|Browser|cisco|java|Oracle|adobe"
  }

  for(year in years){
    prev_year <- year - 1

    # use cached files
    if( count > 1){
      prior <-GetNVDPriors(prev_year,sev_val, match_string = match_string, from_file = from_file) * prior_mult

    }else{
      # Cache file
      prior <-GetNVDPriors(prev_year,sev_val, new_file = new, save_file = save, match_string = match_string, from_file = from_file) * prior_mult
      count = count + 1
    }

    rates <- GetNVDRatesByYear(year, sev_val, day_end = end, get_new = new, cache = save, match_string = match_string, from_file = from_file)

    scale <- prior$alpha + rates$num_vulns
    rate <- prior$lambda + rates$num_vulns * rates$avg_days

    ppd_graph <- GetPostPredDist(Nrep, scale, rate, year, fill_curve)
    pm_graph <- GetPostPredMeanGraph(Nrep, scale, rate, year, fill_curve)
    pgg <- GetGammaPoisChart(prior, rates)

    print(ppd_graph)
    print(pm_graph)
    print(pgg)

  }
}

GetNVDPriors <- function(year, sev_val = 9, optim_vals = c(1,1), new_file = FALSE, save_file = FALSE,
                         match_string = "default", from_file = FALSE){

  nvd_prev <- GetNVDData(year, new = new_file, save = save_file, from_file = from_file)

  # Get critical findings
  prior_open <- GetCritDF(nvd_prev,sev_val, match_string)

  # Sort findings by day of year
  prior_open <- sort(prior_open$pub_day)

  # Add zeros in the list of year days where there were no findings
  prior_open <- GetFirstSeenHits(prior_open, min = 1, max = 365)

  g_priors <- GetGammaPriors(prior_open)

  # Create readable variable names for prior
  ptibble <- tibble(alpha = g_priors[1], lambda = g_priors[2])

  return(ptibble)
}

GetNVDRatesByYear <- function(year, sev_val = 9, day_end = 365, get_new = FALSE, cache = FALSE, match_string = "default", from_file = FALSE){

  nvd_df <- GetNVDData(year, new = get_new, save = cache, from_file = from_file)
  nvd_df <- nvd_df[yday(nvd_df$pub_date) <= day_end,]
  #print(nvd_df)
  crit_df <- GetCritDF(nvd_df,sev_val, match_string)
  wait_time <- sum(diff(sort(crit_df$pub_day)))
  num_vulns = nrow(crit_df)
  vulns_per_wait_time <- num_vulns/wait_time
  avg_days = wait_time/num_vulns

  retTib <- tibble(wait_time, num_vulns, vulns_per_wait_time, avg_days)
  return(retTib)
}

GetPostPredDist <- function(Nrep, prior, rates, year = "", fill_curve = "white"){
  #print(rates)
  #shape_val <- prior$alpha + rates$num_vulns
  shape_val <- prior
  #rate_val <- prior$lambda + rates$num_vulns * rates$avg_days
  rate_val <- rates

  if( year != ""){
    year = paste0(year, " ")
  }

  theta_sim = rgamma(Nrep, shape_val, rate_val)

  y_sim = rexp(Nrep, rate = theta_sim)
  val95 <- round(quantile(y_sim, 0.95),2)
  val50 <- round(quantile(y_sim, 0.50),2)

  df <- tibble(sim=y_sim)
  res <- ggplot(df, aes(x=sim)) +
    geom_histogram(aes(y=..density..), colour="black", fill="white",bins=100)+
    geom_density(alpha=.2, fill=fill_curve)+
    geom_vline(aes(xintercept=mean(val95)),
               color="black", linetype="dashed", size=.5)+
    geom_vline(aes(xintercept=mean(val50)),
               color="black", linetype="dashed", size=.5)+
    labs(title = paste0(year, "Posterior Predictive Distribution"),
         subtitle = paste0("95% Chance Vuln In: ", val95," Days Or Less | 50% In: ",
                           val50, " Or Less")) +
    xlab("Days") + ylab("Strength") + xlim(c(0, max(df$sim)))

  return(res)
}

GetPostPredMeanGraph <- function(Nrep, prior, rates, year = "", fill_curve = "white"){

  #shape_val <- prior$alpha + rates$num_vulns
  #rate_val <- prior$lambda + rates$num_vulns * rates$avg_days
  shape_val <- prior
  rate_val <- rates

  if( year != ""){
    year = paste0(year, " ")
  }

  theta_sim = rgamma(Nrep, shape_val, rate_val)
  tratio <- 1 / theta_sim
  ci <- ci(tratio, method="HDI")
  mean_sim <- round(mean(tratio),2)
  df <- tibble(sim=tratio)
  res <- ggplot(df, aes(x=sim)) +
    geom_histogram(aes(y=..density..), colour="black", fill="white",bins=100)+
    # Comment
    geom_density(alpha=.2, fill= fill_curve)+
    geom_vline(aes(xintercept=ci$CI_low),
               color="black", linetype="dashed", size=.5)+
    geom_vline(aes(xintercept=ci$CI_high),
               color="black", linetype="dashed", size=.5)+
    geom_vline(aes(xintercept=mean_sim),
               color="black", linetype="dashed", size=.75)+
    labs(title = paste0(year,"Posterior Distribution of Mean Inter-Arrivals"),
         subtitle = paste0("Mean Rate: ", mean_sim," | 89% HDI: ",
                           round(ci$CI_low,2), " To: ", round(ci$CI_high,2))) +
    xlab("Mean Waiting Time - Days") + ylab("Strength")

  return(res)
}

GetGammaPoisChart <- function(prior,data){

  p_scale <- prior$alpha + data$num_vulns
  p_rate <- prior$lambda + data$num_vulns * data$avg_days

  rng <- GetRanges(prior, data, p_scale, p_rate)
  sim_g <- rgamma(1000,p_scale,p_rate)
  ci <- ci(sim_g,method = "HDI")
  ggraph <- ggplot(data.frame(x = c(rng$min_r, rng$max_r)), aes(x = x)) +
    stat_function(fun = dgamma, args = list(prior$alpha, prior$lambda),
                  aes(colour = "Prior"), size = 1.5, geom = "area", fill="goldenrod", alpha=0.2) +
    stat_function(fun = dgamma, args = list(data$num_vulns, data$wait_time),
                  aes(colour = "Likelihood"), size = 1.5, geom = "area", fill="red", alpha=0.2) +
    stat_function(fun = dgamma, args = list(p_scale, p_rate),
                  aes(colour = "Posterior"), size = 1.5, geom = "area", fill="blue", alpha=0.2) +
    scale_x_continuous(name = "Vuln Rate")+
    scale_y_continuous(name = "Strength") +
    ggtitle("Normal function curves of probabilities") +
    scale_colour_manual("Bayes Curves", values = c("red", "blue", "goldenrod")) +
    geom_vline(aes(xintercept=ci$CI_low),color="black", linetype="dashed", size=.5)+
    geom_vline(aes(xintercept=ci$CI_high),color="black", linetype="dashed", size=.5)+
    theme_bw()

  return(ggraph)
}

GetFirstSeenHits <- function(vals,min = 1, max = 365){

  # Holder for Result
  res <- c()
  # Loop through day range
  for(x in min:max){
    # If x is a day with a hit in vals...process
    if(x %in% vals){
      # Get count of x's in vals update return vector
      res <- c(res, sum(vals == x))
    }else{
      # x is not in vals, return a 0 to vector
      res <- c(res,0)
    }
  }
  return(res)
}

GetGammaPriors <- function(prior_int){
  boot_mean = function(original_vector, resample_vector) {
    mean(original_vector[resample_vector])
  }
  mean_results <- boot(prior_int, boot_mean, R = 2000)
  qbs = quantile(mean_results$t, prob=c(.05, .95))
  obj = function(x){
    sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95)))
  }
  gvals = optim(par = c(1, 1), obj)$par
  return(gvals)
}

GetNVDData <- function(year, new = FALSE, save = FALSE, from_file = FALSE){
  print(paste0("Year:", year))
  if(new == FALSE){
    nvd_df <- readRDS(paste0("nvd_",year,".RDS"))
    return(nvd_df)
  }

  if( from_file == FALSE){
    temp <- tempfile()
    download.file(paste0("https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-",year,".json.zip"),temp)
    nvd <- fromJSON((unz(temp, paste0("nvdcve-1.1-",year,".json"))))
    unlink(temp)
  }else{
    nvd <- fromJSON((unz(paste0("nvdcve-1.1-",year,".json.zip"),paste0("nvdcve-1.1-",year,".json"))))
  }


  cves <- nvd$CVE_Items$cve$CVE_data_meta$ID
  pub_date <- nvd$CVE_Items$publishedDate
  mod_date <- nvd$CVE_Items$lastModifiedDate
  b_score <- nvd$CVE_Items$impact$baseMetricV2$cvssV2$baseScore
  sev <- nvd$CVE_Items$impact$baseMetricV2$severity
  # int_act <- nvd$CVE_Items$impact$baseMetricV2$userInteractionRequired
  # all_priv <- nvd$CVE_Items$impact$baseMetricV2$obtainAllPrivilege
  # acc_vec <- nvd$CVE_Items$impact$baseMetricV2$cvssV2$accessVector

  desc <- nvd$CVE_Items$cve$description$description_data
  desc_vals = NULL
  for(x in 1:length(desc)){

    if( x == 0){
      desc_vals <- desc[[x]]$value
    }else{
      desc_vals <- c(desc_vals, desc[[x]]$value)
    }
  }

  #nvd_df <- tibble(cves,pub_date,mod_date,b_score,sev, desc_vals, int_act, all_priv, acc_vec)
  nvd_df <- tibble(cves,pub_date,mod_date,b_score,sev, desc_vals)
  if( save == TRUE){
    saveRDS(nvd_df, file = paste0("nvd_",year,".RDS"))
  }
  return(nvd_df)
}

GetLargeVulnGroups <- function(new = FALSE, group_count = 50, ext = c(.005,.03), sev = c(.031, .07), hgh = c(.071, .15)){

  # If new then build simulation
  if( new == TRUE){
    # Can take ~30 seconds to buid
    dt <- GetVulnGroups(group_count, ext, sev, hgh)

    # Save for future use
    saveRDS(dt, file = "audit_df.RDS")
  }else{

    # Instantiated saved file
    dt <- readRDS("audit_df.RDS")
  }
  return(dt)
}

GetEscapeGroups <- function(dt, group_count){
  #dt <- year_one_group

  dt$escaped <- GetEscapeVal(dt)

  # Set a quarter of groups to strating in December
  three_quarters <- group_count - round(group_count/4)
  dt_one_month <- dt %>% filter(group %in% three_quarters:group_count & (week.fseen >= 48 & week.fseen <= 52))

  # Set another quarters of groups starting in October
  half_count <- round(group_count/2)
  dt_one_quarter <- dt %>% filter(group %in% half_count:(three_quarters-1) & (week.fseen >= 39 & week.fseen <= 52))

  # Have the remaining half start at the beginning of the year
  dt_full <- dt %>% filter(group < half_count & week.fseen <= 52)

  # Integrate all of the data frames back into one
  dt <- bind_rows(dt_one_month, dt_one_quarter, dt_full)

  # Remove unwanted data frames.
  dt_one_month <- dt_one_quarter <- dt_full <- NULL

  # Creat group aggregates
  group_vals <- dt %>%
    group_by(group) %>%
    summarize(total_weeks = (max(week.fseen) - min(week.fseen)) + 1,
              prod_vuln = sum(escaped==1),
              dev_vuln = sum(escaped == 0),
              total_vulns = prod_vuln + dev_vuln,.groups = 'drop')

  return(group_vals)

}

GetEscapeVal <- function(dt){
  rand_vals <- runif(nrow(dt))
  #maturity_levels <- mapply(MaturityFlip,dt$dev_maturity)
  prob_escape <- dt$beta_prob/dt$dev_maturity
  dt$escaped <- if_else(rand_vals < prob_escape,1,0)
  return(dt$escaped)
}

AddEscapeDF <- function(df1, df2){
  count <- max(nrow(df1),nrow(df2))
  res <- tibble()
  for(x in 1:count){
    if(is.na(df1[x,1]) & is.na(df2[x,1])){
      #do nothing
    }else if(is.na(df1[x,1]) & !is.na(df2[x,1])){
      tib <- df2[x,]
    }else if(is.na(df2[x,1]) & !is.na(df1[x,1])){
      tib <- df1[x,]
    }else{
      tib <- df1[x,] + df2[x,]
    }
    res <- bind_rows(res, tib)
  }
  return(res)
}

GetBurnMLE <- function(hits,misses){
  ll <- function(alpha, beta) {
    x <- hits
    total <- hits + misses
    #print(total)
    -sum(VGAM::dbetabinom.ab(x, total, alpha, beta, log = TRUE))
  }

  m <- mle(ll, start = list(alpha = 1, beta = 10), method = "L-BFGS-B",lower = c(0.0001, .1))

  return(coef(m))
}

MakeProportionChart <- function(df_val, xlab="Escape Rates w/95% Credible Intervals", ylab="Product Development Groups",
                                tval="Escape Rates By Product Group - With KPIs",
                                sval="Predictive Rates (black) vs Actuals (red)",
                                kpis = c(.05,.1,.2)){
  df_val %>%
    ggplot(aes(emp_bayes_avg, rank(emp_bayes_avg))) +
    geom_point() +
    geom_point(aes(x = escape_avg), color = "red") +
    geom_errorbarh(aes(xmin = low_ci, xmax = high_ci)) +
    geom_vline(xintercept=kpis[1], color="maroon", size=1, linetype="dotted") +
    geom_vline(xintercept=kpis[2], color="orangered2", size=1, linetype="dotted") +
    geom_vline(xintercept=kpis[3], color="goldenrod", size=1, linetype="dotted") +
    labs(x = xlab,
         y = ylab,
         title = tval,
         subtitle = sval) +
    theme_bw()

  #return(res)
}

GetCritDF <- function(nvd_val, sev_val, match_string = "default"){

  if( match_string == "deafult"){
    match_string <-  "Windows|Microsoft|Office|Mac|Apple|Browser|cisco|java|Oracle|adobe"
  }

  crit_df <- nvd_val %>% filter(b_score >= sev_val & grepl(match_string, desc_vals,ignore.case = TRUE))
  crit_df$pub_day <- yday(crit_df$pub_date)
  crit_df$mod_day <- yday(crit_df$pub_date)
  return(crit_df)
}


GetRanges <- function(prior, data, p_scale, p_rate){
  p_range <- qgamma(c(.001,.999),prior$alpha,prior$lambda)
  d_range <- qgamma(c(.001,.999),data$num_vulns,data$wait_time)
  r_range <- qgamma(c(.001,.999),p_scale,p_rate)
  ranges <- c(p_range, d_range, r_range)
  min_r <- min(ranges) - .01
  max_r <- max(ranges) + .01
  return(tibble(min_r, max_r))
}

sla_check <- function(fix_time,severity,exposure,audit){

  # Extreme
  if(severity == "extreme" & exposure == "external" & audit == 0){

    if( fix_time <= 7){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "extreme" & exposure == "external") | (severity == "critical" & exposure == "external" & audit == 0)){
    if( fix_time <= 14){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "critical" & exposure == "external") | (severity == "high" & exposure == "external" & audit == 0)){
    if(fix_time <= 21){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "high" & exposure == "external") | (severity == "extreme" & exposure == "partner" & audit == 0)){
    if(fix_time <= 28){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "extreme" & exposure == "partner") | (severity == "critical" & exposure == "partner" & audit == 0) ){
    if(fix_time <= 35){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "critical" & exposure == "partner") | (severity == "high" & exposure == "partner" & audit == 0) ){
    if(fix_time <= 42){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "high" & exposure == "partner") | (severity == "extreme" & exposure == "internal" & audit == 0) ){
    if(fix_time <= 50){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "extreme" & exposure == "internal") | (severity == "critical" & exposure == "internal" & audit == 0) ){
    if(fix_time <= 60){
      return(1)
    }else{
      return(0)
    }
  }

  if((severity == "critical" & exposure == "internal") | (severity == "high" & exposure == "internal" & audit == 0) ){
    if(fix_time <= 75){
      return(1)
    }else{
      return(0)
    }
  }
  if(severity == "high" & exposure == "internal"){
    if(fix_time <= 90){
      return(1)
    }else{
      return(0)
    }
  }
  return(0)
}

getLogOddsInt <- function(prob, scale){

  # Turn probability into odds
  odds <- prob/(1-prob)

  # Turn odds into log odds
  logOdds <- log(odds)

  # Get a standard deviation value
  sdev <- abs(round(logOdds/scale,2))

  # Return
  return(c(logOdds,sdev))
}


policy_pred <- function(mcmc, sev, exp, dev_val, audit_val){

  # simulate from posterior
  res <- posterior_epred( mcmc, newdata = data.frame(
    severity = sev,
    exposure = exp,
    dev_maturity = dev_val,
    audit = audit_val))

  # Highest Density Interval
  ci_val <- ci(res, method = "HDI")
  ret_tib <- tibble(mean_est = mean(res),
                    ci_low = ci_val$CI_low,
                    ci_high = ci_val$CI_high)

  return(ret_tib)
}