Created
April 19, 2022 01:46
-
-
Save ribsy/8a9667859e65b0e3dfa6c4af0e8a939a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("manifesto_functions.R") | |
############################################## | |
### Introduction: Random Bombs and Horse Kicks | |
# Bomb Narrative | |
bombs <- tibble(hits = c(0,1,2,3,4,5), | |
frequency = c(229, 211, 93, 35, 7, 1), | |
ratio = frequency/sum(frequency)) | |
bombs | |
table(rpois(576,.932)) | |
# Horse kick Narrative | |
kicks <- tibble(victims = c(0,1,2,3,4,5), | |
frequency = c(109,65,22,3,1,0), | |
ratio = frequency/sum(frequency)) | |
kicks | |
kick_amount <- c(rep(0,109), rep(1,65), rep(2,22), rep(3,3), rep(4,1)) | |
length(kick_amount) | |
kick_amount | |
sum(kick_amount) / length(kick_amount) | |
var(kick_amount)/mean(kick_amount) | |
# Create 200 calvary-years of simulated data and add to kicks data frame | |
kicks$predicted <- round(200 * dpois(0:5,.61)) | |
####################### | |
### Simulating Arrivals | |
# Make Vuln Data Table For Product Team 1 | |
dtGroupOne <- GetAdvSecResults(c(.02,.03,.08),2,4) | |
dtGroupOne$group <- 1 | |
#Make Vuln Data Table For Product Team 2 | |
dtGroupTwo <- GetAdvSecResults(c(.03,.05,.10),1,1) | |
dtGroupTwo$group <- 2 | |
#Make Vuln Data Table For Product Team 3 | |
dtGroupThree <- GetAdvSecResults(c(.05,.07,.12),1,1) | |
dtGroupThree$group <- 3 | |
# Merge Data Tables | |
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree) | |
View(dt) | |
vuln_wks <- GetVulnsOpenedByWks(dt) | |
vuln_wks | |
##################################### | |
### Quick Exploration of Arrival Data | |
table(vuln_wks$extreme) | |
# Barplot | |
barplot(table(vuln_wks$extreme), | |
ylab="Number of Weeks",xlab="Number of Vulnerabilities") | |
plot(vuln_wks$week.fseen, vuln_wks$extreme, ylab="Vulnerability Count", | |
xlab="Weeks", type="l") | |
plot(vuln_wks$week.fseen, vuln_wks$all, ylab="Vulnerability Count", | |
xlab="Weeks", type="l") | |
########################## | |
### Simulating Rare Events | |
# Make Data For 3 Prod Teams Using Function | |
dtGroupOne <- GetAdvSecResults(c(.005,.03,.08),1,4) | |
dtGroupOne$group <- 1 | |
dtGroupTwo <- GetAdvSecResults(c(.01,.06,.10),1,3) | |
dtGroupTwo$group <- 2 | |
dtGroupThree <- GetAdvSecResults(c(.02,.08,.12),1,2) | |
dtGroupThree$group <- 3 | |
# Load into one data frame. | |
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree) | |
# Get Aggregates for Critical Assets | |
vuln_wks <- GetVulnsOpenedByWks(dt,audit_list = c(0), exposure_list = c(1)) | |
vuln_wks | |
table(vuln_wks$extreme) | |
###################### | |
### Arrival Prediction | |
hits <- 1 - ppois(0:2, lambda=mean(vuln_wks$extreme)) | |
round(hits,3) | |
mean(vuln_wks$extreme) | |
# Run 3 times to see variation | |
ext_wk <- rpois(52, lambda=mean(vuln_wks$extreme)) | |
mean(ext_wk) | |
data_ratio <- var(vuln_wks$extreme)/mean(vuln_wks$extreme) | |
data_ratio | |
############################# | |
### Bayes Meets Arrival Rates | |
# Create Two Additional Data Tables | |
dtGroupFour <- GetAdvSecResults(c(.03,.09,.15),1,3) | |
dtGroupFour$group <- 4 | |
dtGroupFive <- GetAdvSecResults(c(.008,.04,.12),1,4) | |
dtGroupFive$group <- 5 | |
# Load into one data table. | |
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree, dtGroupFour, | |
dtGroupFive) | |
# Get Assets Under Audit With External Exposure | |
vuln_wks <- GetVulnsOpenedByWks(dt,audit_list = c(0), exposure_list = c(1), | |
group_list = 1:10, week_list = 1:52) | |
table(vuln_wks$extreme) | |
data_ratio <- var(vuln_wks$extreme)/mean(vuln_wks$extreme) | |
data_ratio | |
################################### | |
### Subject Matter Expert Interview | |
# First quarter data | |
cnt <- vuln_wks$extreme[vuln_wks$week.fseen <= 13] | |
cnt | |
# Get hits in 13 weeks for input below | |
sum(cnt) | |
# Add beliefs for each eng, and 20 hits in 13 weeks | |
gscore <- ScoreGammaBeliefs( | |
bias = c(0.5, 0.5), | |
first_belief = c(40,52), | |
second_belief = c(30,52), | |
data = c(sum(cnt),13)) | |
gscore | |
############################################ | |
### Building A Simple Bayesian Arrival Model | |
# Scaled Location Of Rate | |
arrival_shape <- 3 | |
arrival_rate <- 4 | |
# Plausible Rates | |
plausible_arrivals = seq(0, 6, 0.001) | |
#Get Distribution of Beliefs | |
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate) | |
# Plausible Rates Graphed | |
plotGammaAdv(plausible_arrivals,arrival_beliefs, ytop = 4, | |
tlab = "Weekly Vuln Arrival Rate", | |
slab = "Rates More Focused on .75") | |
#################################################### | |
### Updating Our Beliefs About Extreme Arrival Rates | |
# Scaled Location Of Rate | |
arrival_shape <- 9 #Updated | |
arrival_rate <- 12 #Updated | |
# Plausible Arrival Rates | |
plausible_arrivals = seq(0, 2.5, 0.001) | |
#Get Arrival Beliefs | |
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate) | |
# Plausible Rates Scored | |
plotGammaAdv(plausible_arrivals,arrival_beliefs,ytop = 4, | |
tlab = "Weekly Vuln Arrival Rate", | |
slab = "Rates Closer To .75 Most Likely") | |
### First Quarter Data | |
#Data | |
total_weeks <- 13 #Q1 | |
# Subset vuln_wks to get the first 13 weeks of data | |
total_arrivals <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= total_weeks]) | |
mean_arrivals <- total_arrivals/total_weeks #optional | |
# likelihood of the data - FOR GRAPHING ONLY | |
arrival_likelihood = dpois(total_arrivals, total_weeks * | |
plausible_arrivals) | |
# posterior beliefs - mesh priors with data | |
posterior_beliefs = dgamma(plausible_arrivals, | |
arrival_shape + total_arrivals, | |
arrival_rate + total_weeks) | |
# Mesh Prior Beliefs with New Data To Get Updated Beliefs | |
plot_posterior(plausible_arrivals, arrival_beliefs, arrival_likelihood, | |
posterior_beliefs) | |
# Add a 95% Credible Interval To The Graph | |
abline(v = qgamma(c(0.025, 0.975), arrival_shape + total_weeks * | |
mean_arrivals, arrival_rate + total_weeks), | |
col = "seagreen", lty = 2) | |
### Full Code | |
# Scaled Location Of The Prior Rate | |
arrival_shape <- 9 | |
arrival_rate <- 12 | |
# Plausible Rates | |
plausible_arrivals = seq(0, 2.5, 0.001) | |
#Get Prior Density | |
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate) | |
# Plot Prior | |
plotGammaAdv(plausible_arrivals,arrival_beliefs,ytop = 4, | |
tlab = "Weekly Vuln Arrival Rate", | |
slab = "Rates More Focused on 1") | |
#Get One Year’s Worth of Data | |
total_weeks <- 52 | |
total_arrivals <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= total_weeks]) | |
mean_arrivals <- total_arrivals/total_weeks | |
# Get likelihood – for graphing | |
arrival_likelihood = dpois(total_arrivals, total_weeks * | |
plausible_arrivals) | |
# Get updated posterior | |
posterior_beliefs = dgamma(plausible_arrivals, arrival_shape + | |
total_arrivals, arrival_rate + total_weeks) | |
# Plot Posterior | |
plot_posterior(plausible_arrivals, arrival_beliefs, arrival_likelihood, | |
posterior_beliefs) | |
# Add 95% Credible Interval | |
abline(v = qgamma(c(0.025, 0.975), arrival_shape + total_weeks * | |
mean_arrivals, arrival_rate + total_weeks), | |
col = "seagreen", lty = 2) | |
# Sample From Posterior | |
samples <- sample( plausible_arrivals , prob=posterior_beliefs, | |
size = 10000, replace = TRUE ) | |
# Graph Posterior Alone | |
posteriorGraph(samples) | |
ci_hdi <- ci(samples, method = "HDI", verbose = FALSE) | |
ci_hdi | |
ci_hdi_small <- ci(samples, method = "HDI", ci = .50, verbose = FALSE) | |
ci_hdi_small | |
# Run a few times to see varation | |
rp <- rpois(total_weeks,samples) | |
mean(rp) | |
reps = 10000 | |
sims = rpois(reps, samples) | |
plot(table(sims) / reps, type = "h", | |
xlab = "Number Of Extreme Vulnerabilities", | |
ylab = "Simulated frequency", | |
main = "Posterior predictive distribution", | |
sub = paste0("Out of ",reps," simulations")) | |
quantile(sims, 0.95) | |
sum(sims >= 1)/reps | |
sum(sims >= 2)/reps | |
sum(sims >= 3)/reps | |
####################### | |
### Advanced Prediction | |
# Get 8 weeks of data | |
# Functions used to get optimized gamma values | |
boot_mean <- function(original_vector, resample_vector) { | |
mean(original_vector[resample_vector]) | |
} | |
obj = function(x){ | |
sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95))) | |
} | |
prior_weeks <- 8 | |
v_open <- c(0, 1, 0, 0, 3, 0, 1, 2) | |
# Simulate 2000 weekly vuln rates | |
mean_results <- boot(v_open, boot_mean, R = 2000) | |
# Print out a few weekly vulnerability rates | |
head(mean_results$t,10) | |
# Get 95% interval from rates | |
qbs = quantile(mean_results$t, prob=c(.05, .95)) | |
# Print Rates | |
qbs | |
# Get best gamma() priors. | |
# Note obj takes in qbs values (95% CI) as a parameter | |
theta = optim(par = c(1, 1), obj)$par | |
# Print optimized shape and rate values for gamma function | |
theta | |
# Get Rate | |
theta[1]/theta[2] | |
### Model the first two quarters of data | |
weeks <- 26 | |
# Use clear names for priors | |
prior_shape <- theta[1] | |
prior_rate <- theta[2] | |
# Get count of extreme vulns for the first 26 weeks. | |
vulns <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= weeks]) | |
#BAYES CHART | |
# Posterior | |
curve(dgamma(x, shape=vulns + prior_shape, rate=weeks + prior_rate), | |
from=0,to=2, xlab="Vulns Per Week", | |
ylab="Density", col=1, lwd=4, las=1) | |
# Likelihood | |
curve(dgamma(x, shape=vulns, rate=weeks), add=TRUE, col=2, lwd=4) | |
# Prior | |
curve(dgamma(x, shape=prior_shape, rate=prior_rate), add=TRUE, | |
col=3, lwd=4) | |
# Legend | |
legend("topright", c("Prior", "Likelihood", | |
"Posterior"), col=c(3, 2, 1), lwd=c(3, 3, 3)) | |
### 52 Weeks Prediction | |
# PREDICTIVE CHART | |
new_weeks = 52 #weeks | |
# Mean | |
gamma_mean = new_weeks * (vulns + prior_shape)/(weeks + prior_rate) | |
# Variance | |
gamma_var = new_weeks * gamma_mean * (new_weeks + weeks + | |
prior_rate)/(weeks + prior_rate) | |
# Rates along the x axis - the 4 and 1.8 help set the width | |
future_rates = round(gamma_mean/4):round(gamma_mean*1.8) | |
# Score (Density) for each of the future rates. | |
future_vulns = dnbinom(future_rates, mu=gamma_mean, size=gamma_var) | |
# Graph Histogram | |
par(las=1, mar=c(5, 4, 2, 4)) | |
plot(future_rates, future_vulns, type="h", | |
xlab="Number of Vulnerabilities", | |
ylab="Probability", col="gray", lwd=4) | |
par(new=TRUE) | |
# Cumulative Probability Curve | |
p = pnbinom(future_rates, mu=gamma_mean, size=gamma_var) | |
plot(future_rates, p, type="l", col="red", xaxt="n", | |
yaxt="n",xlab="", ylab="", lwd=2) | |
axis(4) | |
mtext("Probability of N% or less vulnerabilities",side=4, | |
line=2.5, las=0) | |
par(new=TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment