Skip to content

Instantly share code, notes, and snippets.

@ribsy
Created April 19, 2022 01:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ribsy/8a9667859e65b0e3dfa6c4af0e8a939a to your computer and use it in GitHub Desktop.
Save ribsy/8a9667859e65b0e3dfa6c4af0e8a939a to your computer and use it in GitHub Desktop.
source("manifesto_functions.R")
##############################################
### Introduction: Random Bombs and Horse Kicks
# Bomb Narrative
bombs <- tibble(hits = c(0,1,2,3,4,5),
frequency = c(229, 211, 93, 35, 7, 1),
ratio = frequency/sum(frequency))
bombs
table(rpois(576,.932))
# Horse kick Narrative
kicks <- tibble(victims = c(0,1,2,3,4,5),
frequency = c(109,65,22,3,1,0),
ratio = frequency/sum(frequency))
kicks
kick_amount <- c(rep(0,109), rep(1,65), rep(2,22), rep(3,3), rep(4,1))
length(kick_amount)
kick_amount
sum(kick_amount) / length(kick_amount)
var(kick_amount)/mean(kick_amount)
# Create 200 calvary-years of simulated data and add to kicks data frame
kicks$predicted <- round(200 * dpois(0:5,.61))
#######################
### Simulating Arrivals
# Make Vuln Data Table For Product Team 1
dtGroupOne <- GetAdvSecResults(c(.02,.03,.08),2,4)
dtGroupOne$group <- 1
#Make Vuln Data Table For Product Team 2
dtGroupTwo <- GetAdvSecResults(c(.03,.05,.10),1,1)
dtGroupTwo$group <- 2
#Make Vuln Data Table For Product Team 3
dtGroupThree <- GetAdvSecResults(c(.05,.07,.12),1,1)
dtGroupThree$group <- 3
# Merge Data Tables
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree)
View(dt)
vuln_wks <- GetVulnsOpenedByWks(dt)
vuln_wks
#####################################
### Quick Exploration of Arrival Data
table(vuln_wks$extreme)
# Barplot
barplot(table(vuln_wks$extreme),
ylab="Number of Weeks",xlab="Number of Vulnerabilities")
plot(vuln_wks$week.fseen, vuln_wks$extreme, ylab="Vulnerability Count",
xlab="Weeks", type="l")
plot(vuln_wks$week.fseen, vuln_wks$all, ylab="Vulnerability Count",
xlab="Weeks", type="l")
##########################
### Simulating Rare Events
# Make Data For 3 Prod Teams Using Function
dtGroupOne <- GetAdvSecResults(c(.005,.03,.08),1,4)
dtGroupOne$group <- 1
dtGroupTwo <- GetAdvSecResults(c(.01,.06,.10),1,3)
dtGroupTwo$group <- 2
dtGroupThree <- GetAdvSecResults(c(.02,.08,.12),1,2)
dtGroupThree$group <- 3
# Load into one data frame.
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree)
# Get Aggregates for Critical Assets
vuln_wks <- GetVulnsOpenedByWks(dt,audit_list = c(0), exposure_list = c(1))
vuln_wks
table(vuln_wks$extreme)
######################
### Arrival Prediction
hits <- 1 - ppois(0:2, lambda=mean(vuln_wks$extreme))
round(hits,3)
mean(vuln_wks$extreme)
# Run 3 times to see variation
ext_wk <- rpois(52, lambda=mean(vuln_wks$extreme))
mean(ext_wk)
data_ratio <- var(vuln_wks$extreme)/mean(vuln_wks$extreme)
data_ratio
#############################
### Bayes Meets Arrival Rates
# Create Two Additional Data Tables
dtGroupFour <- GetAdvSecResults(c(.03,.09,.15),1,3)
dtGroupFour$group <- 4
dtGroupFive <- GetAdvSecResults(c(.008,.04,.12),1,4)
dtGroupFive$group <- 5
# Load into one data table.
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree, dtGroupFour,
dtGroupFive)
# Get Assets Under Audit With External Exposure
vuln_wks <- GetVulnsOpenedByWks(dt,audit_list = c(0), exposure_list = c(1),
group_list = 1:10, week_list = 1:52)
table(vuln_wks$extreme)
data_ratio <- var(vuln_wks$extreme)/mean(vuln_wks$extreme)
data_ratio
###################################
### Subject Matter Expert Interview
# First quarter data
cnt <- vuln_wks$extreme[vuln_wks$week.fseen <= 13]
cnt
# Get hits in 13 weeks for input below
sum(cnt)
# Add beliefs for each eng, and 20 hits in 13 weeks
gscore <- ScoreGammaBeliefs(
bias = c(0.5, 0.5),
first_belief = c(40,52),
second_belief = c(30,52),
data = c(sum(cnt),13))
gscore
############################################
### Building A Simple Bayesian Arrival Model
# Scaled Location Of Rate
arrival_shape <- 3
arrival_rate <- 4
# Plausible Rates
plausible_arrivals = seq(0, 6, 0.001)
#Get Distribution of Beliefs
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate)
# Plausible Rates Graphed
plotGammaAdv(plausible_arrivals,arrival_beliefs, ytop = 4,
tlab = "Weekly Vuln Arrival Rate",
slab = "Rates More Focused on .75")
####################################################
### Updating Our Beliefs About Extreme Arrival Rates
# Scaled Location Of Rate
arrival_shape <- 9 #Updated
arrival_rate <- 12 #Updated
# Plausible Arrival Rates
plausible_arrivals = seq(0, 2.5, 0.001)
#Get Arrival Beliefs
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate)
# Plausible Rates Scored
plotGammaAdv(plausible_arrivals,arrival_beliefs,ytop = 4,
tlab = "Weekly Vuln Arrival Rate",
slab = "Rates Closer To .75 Most Likely")
### First Quarter Data
#Data
total_weeks <- 13 #Q1
# Subset vuln_wks to get the first 13 weeks of data
total_arrivals <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= total_weeks])
mean_arrivals <- total_arrivals/total_weeks #optional
# likelihood of the data - FOR GRAPHING ONLY
arrival_likelihood = dpois(total_arrivals, total_weeks *
plausible_arrivals)
# posterior beliefs - mesh priors with data
posterior_beliefs = dgamma(plausible_arrivals,
arrival_shape + total_arrivals,
arrival_rate + total_weeks)
# Mesh Prior Beliefs with New Data To Get Updated Beliefs
plot_posterior(plausible_arrivals, arrival_beliefs, arrival_likelihood,
posterior_beliefs)
# Add a 95% Credible Interval To The Graph
abline(v = qgamma(c(0.025, 0.975), arrival_shape + total_weeks *
mean_arrivals, arrival_rate + total_weeks),
col = "seagreen", lty = 2)
### Full Code
# Scaled Location Of The Prior Rate
arrival_shape <- 9
arrival_rate <- 12
# Plausible Rates
plausible_arrivals = seq(0, 2.5, 0.001)
#Get Prior Density
arrival_beliefs <- dgamma(plausible_arrivals, arrival_shape, arrival_rate)
# Plot Prior
plotGammaAdv(plausible_arrivals,arrival_beliefs,ytop = 4,
tlab = "Weekly Vuln Arrival Rate",
slab = "Rates More Focused on 1")
#Get One Year’s Worth of Data
total_weeks <- 52
total_arrivals <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= total_weeks])
mean_arrivals <- total_arrivals/total_weeks
# Get likelihood – for graphing
arrival_likelihood = dpois(total_arrivals, total_weeks *
plausible_arrivals)
# Get updated posterior
posterior_beliefs = dgamma(plausible_arrivals, arrival_shape +
total_arrivals, arrival_rate + total_weeks)
# Plot Posterior
plot_posterior(plausible_arrivals, arrival_beliefs, arrival_likelihood,
posterior_beliefs)
# Add 95% Credible Interval
abline(v = qgamma(c(0.025, 0.975), arrival_shape + total_weeks *
mean_arrivals, arrival_rate + total_weeks),
col = "seagreen", lty = 2)
# Sample From Posterior
samples <- sample( plausible_arrivals , prob=posterior_beliefs,
size = 10000, replace = TRUE )
# Graph Posterior Alone
posteriorGraph(samples)
ci_hdi <- ci(samples, method = "HDI", verbose = FALSE)
ci_hdi
ci_hdi_small <- ci(samples, method = "HDI", ci = .50, verbose = FALSE)
ci_hdi_small
# Run a few times to see varation
rp <- rpois(total_weeks,samples)
mean(rp)
reps = 10000
sims = rpois(reps, samples)
plot(table(sims) / reps, type = "h",
xlab = "Number Of Extreme Vulnerabilities",
ylab = "Simulated frequency",
main = "Posterior predictive distribution",
sub = paste0("Out of ",reps," simulations"))
quantile(sims, 0.95)
sum(sims >= 1)/reps
sum(sims >= 2)/reps
sum(sims >= 3)/reps
#######################
### Advanced Prediction
# Get 8 weeks of data
# Functions used to get optimized gamma values
boot_mean <- function(original_vector, resample_vector) {
mean(original_vector[resample_vector])
}
obj = function(x){
sum(abs(pgamma(q=qbs, shape=x[1], rate=x[2]) - c(.05, .95)))
}
prior_weeks <- 8
v_open <- c(0, 1, 0, 0, 3, 0, 1, 2)
# Simulate 2000 weekly vuln rates
mean_results <- boot(v_open, boot_mean, R = 2000)
# Print out a few weekly vulnerability rates
head(mean_results$t,10)
# Get 95% interval from rates
qbs = quantile(mean_results$t, prob=c(.05, .95))
# Print Rates
qbs
# Get best gamma() priors.
# Note obj takes in qbs values (95% CI) as a parameter
theta = optim(par = c(1, 1), obj)$par
# Print optimized shape and rate values for gamma function
theta
# Get Rate
theta[1]/theta[2]
### Model the first two quarters of data
weeks <- 26
# Use clear names for priors
prior_shape <- theta[1]
prior_rate <- theta[2]
# Get count of extreme vulns for the first 26 weeks.
vulns <- sum(vuln_wks$extreme[vuln_wks$week.fseen <= weeks])
#BAYES CHART
# Posterior
curve(dgamma(x, shape=vulns + prior_shape, rate=weeks + prior_rate),
from=0,to=2, xlab="Vulns Per Week",
ylab="Density", col=1, lwd=4, las=1)
# Likelihood
curve(dgamma(x, shape=vulns, rate=weeks), add=TRUE, col=2, lwd=4)
# Prior
curve(dgamma(x, shape=prior_shape, rate=prior_rate), add=TRUE,
col=3, lwd=4)
# Legend
legend("topright", c("Prior", "Likelihood",
"Posterior"), col=c(3, 2, 1), lwd=c(3, 3, 3))
### 52 Weeks Prediction
# PREDICTIVE CHART
new_weeks = 52 #weeks
# Mean
gamma_mean = new_weeks * (vulns + prior_shape)/(weeks + prior_rate)
# Variance
gamma_var = new_weeks * gamma_mean * (new_weeks + weeks +
prior_rate)/(weeks + prior_rate)
# Rates along the x axis - the 4 and 1.8 help set the width
future_rates = round(gamma_mean/4):round(gamma_mean*1.8)
# Score (Density) for each of the future rates.
future_vulns = dnbinom(future_rates, mu=gamma_mean, size=gamma_var)
# Graph Histogram
par(las=1, mar=c(5, 4, 2, 4))
plot(future_rates, future_vulns, type="h",
xlab="Number of Vulnerabilities",
ylab="Probability", col="gray", lwd=4)
par(new=TRUE)
# Cumulative Probability Curve
p = pnbinom(future_rates, mu=gamma_mean, size=gamma_var)
plot(future_rates, p, type="l", col="red", xaxt="n",
yaxt="n",xlab="", ylab="", lwd=2)
axis(4)
mtext("Probability of N% or less vulnerabilities",side=4,
line=2.5, las=0)
par(new=TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment