Created
April 5, 2023 09:15
-
-
Save ribsy/8e2ad5aa8fc5adec17ed625f031d718a to your computer and use it in GitHub Desktop.
R Code For Chapter 10 HTMA Cybersecurity Risk v 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(bayestestR) | |
library(bayesAB) | |
# See Manifesto Functions at www.themetricsmanifesto.com | |
# Put in your same working director | |
source("manifesto_functions.R") | |
bayesPhishArrive <- function(prior_vals = c(5,1), total_vals = c(87,12)){ | |
# Subjective Priors, equals 1.5 per week i.e. 3/2 | |
prior_events <- prior_vals[1] | |
prior_weeks <-prior_vals[2] | |
prior_rate <- prior_events / prior_weeks | |
# Data, which equals 2.5 per week 25/10 | |
total_events <- total_vals[1] | |
total_weeks <- total_vals[2] | |
total_rate <- total_events / total_weeks | |
range_events <- if_else(total_rate <= prior_rate, prior_rate, total_rate) | |
range_events <- range_events * 5 | |
# Make grid of 10K sequential values between 0 to 5 | |
plausible_rates <- data.frame(possible_grid = | |
seq(from = 0, to = range_events, length = 10000)) | |
# Get Prior and Likelihood | |
plausible_rates <- plausible_rates %>% | |
mutate(prior = dgamma(possible_grid, prior_events, prior_weeks), | |
likelihood = dpois(total_events, possible_grid * total_weeks)) | |
# Get Posterior | |
plausible_rates <- plausible_rates %>% | |
mutate(unnormalized = likelihood * prior, | |
posterior = unnormalized / sum(unnormalized)) | |
# Simulate 10K samples weighted on posterior | |
post_sample <- sample_n(plausible_rates, size = 10000, | |
weight = posterior, replace = TRUE) | |
return(post_sample) | |
} | |
get_phish_rate <- function(post_sample){ | |
phish_res <- post_sample %>% filter(posterior == max(posterior)) %>% | |
select(possible_grid) %>% unique() | |
phish_rate <- phish_res[[1]] | |
return(phish_rate) | |
} | |
bayes_waits_analysis <- function(prior,waits){ | |
# Number Of Simulations | |
Nrep = 10000 | |
n <- length(waits) | |
wait_time <- sum(waits) | |
# Get avg vulns per wait time | |
vulns_per_wait_time <- n/wait_time | |
gvals <- GetGammaPriors(prior) | |
scale_val <- prior_scale | |
alpha <- gvals[1] * scale_val | |
lambda <- gvals[2] * scale_val | |
# prior vuln count + num vulns from data | |
shape = alpha + n | |
# prior time + wait time from data | |
rate = lambda + wait_time | |
# Simulate 10K vuln day rates: .21, .17, .15, .22 etc | |
theta_sim = rgamma(Nrep, shape, rate) | |
# Get interarrival values from the exponential distribution | |
y_sim = rexp(Nrep, rate = theta_sim) | |
# Get 95% and 50% quantiles - used for making vertical lines | |
val95 <- round(quantile(y_sim, 0.95),2) | |
val50 <- round(quantile(y_sim, 0.50),2) | |
# Pass interarrival data into DF | |
df <- tibble(sim=y_sim) | |
# Pass DF into ggplot | |
ggplot(df, aes(x=sim)) + | |
# Declare histogram shape | |
geom_histogram(aes(y=..density..), colour="black", | |
fill="white",bins=100)+ | |
# Curve for chart | |
geom_density(alpha=.2, fill="white")+ | |
# 95% or less vertical line | |
geom_vline(aes(xintercept=mean(val95)), | |
color="black", linetype="dashed", size=.5)+ | |
# 50% or less vertical line | |
geom_vline(aes(xintercept=mean(val50)), | |
color="black", linetype="dashed", size=.5)+ | |
# Labels for chart | |
labs(title = "Posterior Predictive Distribution", | |
subtitle = paste0("95% Chance of Event In: ", | |
val95," Days Or Less | 50% In: ", | |
val50, " Or Less")) + | |
xlab("Days") + ylab("Strength") + xlim(c(0, val95*2)) | |
} | |
bayes_waits_basic <- function(prior, waits, prior_scale = 1,x_update = 2){ | |
p_avg <- sum(prior)/length(prior) | |
w_avg <- sum(waits)/length(waits) | |
xrange <- ifelse(w_avg > p_avg, w_avg, p_avg) * x_update | |
gvals <- GetGammaPriors(prior) | |
scale_val <- prior_scale | |
alpha <- gvals[1] * scale_val | |
lambda <- gvals[2] * scale_val | |
# Create a list of the possible rates (for graphing only) | |
theta = seq(0, xrange, 0.001) | |
# Score possibilities using priors (for graphing only) | |
prior = dgamma(theta, shape = alpha, rate = lambda) | |
# likelihood distribution - for graphing purposes only | |
likelihood = dgamma(sum(waits), shape = length(waits), rate = theta) | |
# Posterior - Conjugate prior update. | |
posterior = dgamma(theta, alpha + length(waits), lambda + sum(waits)) | |
# Bayesian update graph | |
plot_posterior(theta, prior, likelihood, posterior) | |
abline(v = qgamma(c(0.025, 0.975), alpha + length(waits), lambda + sum(waits)), | |
col = "seagreen", lty = 2) | |
} | |
non_bayes_waits <- function(waits,range = 30, to_val = ""){ | |
# Determine if event rate measurement should be in days or hours. | |
ratio_val <- sum(waits)/length(waits) | |
time_vals <- pexp(1:range, ratio_val,lower.tail = TRUE) | |
index_val <- max(which(time_vals < .99)) | |
rate_val <- 1000/index_val | |
if(to_val == ""){ | |
to_val <- round(index_val/2) | |
} | |
time_type <- "time" | |
tibble(x = 0:1000 / rate_val, | |
prob = pexp(q = 0:1000 / rate_val, | |
rate = sum(waits)/length(waits), | |
lower.tail = TRUE)) %>% | |
# Add a column to the data frame | |
mutate(Interval = ifelse(x>= 0 & x <= to_val, paste0("0 to ", to_val), "other")) %>% | |
# Send the data frame to the graphing functions | |
ggplot(aes(x = x, y = prob, fill = Interval)) + | |
geom_area(alpha = 0.3) + | |
labs(title = "Probability Of Event Arriving", | |
subtitle = paste0(round(time_vals[to_val],2)*100,"% chance of event arriving in ",to_val," day or less."), | |
x = str_to_title(time_type), | |
y = "Cumulative Probability") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment