Skip to content

Instantly share code, notes, and snippets.

@ribsy
Created April 5, 2023 09:15
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ribsy/8e2ad5aa8fc5adec17ed625f031d718a to your computer and use it in GitHub Desktop.
Save ribsy/8e2ad5aa8fc5adec17ed625f031d718a to your computer and use it in GitHub Desktop.
R Code For Chapter 10 HTMA Cybersecurity Risk v 2
library(tidyverse)
library(bayestestR)
library(bayesAB)
# See Manifesto Functions at www.themetricsmanifesto.com
# Put in your same working director
source("manifesto_functions.R")
bayesPhishArrive <- function(prior_vals = c(5,1), total_vals = c(87,12)){
# Subjective Priors, equals 1.5 per week i.e. 3/2
prior_events <- prior_vals[1]
prior_weeks <-prior_vals[2]
prior_rate <- prior_events / prior_weeks
# Data, which equals 2.5 per week 25/10
total_events <- total_vals[1]
total_weeks <- total_vals[2]
total_rate <- total_events / total_weeks
range_events <- if_else(total_rate <= prior_rate, prior_rate, total_rate)
range_events <- range_events * 5
# Make grid of 10K sequential values between 0 to 5
plausible_rates <- data.frame(possible_grid =
seq(from = 0, to = range_events, length = 10000))
# Get Prior and Likelihood
plausible_rates <- plausible_rates %>%
mutate(prior = dgamma(possible_grid, prior_events, prior_weeks),
likelihood = dpois(total_events, possible_grid * total_weeks))
# Get Posterior
plausible_rates <- plausible_rates %>%
mutate(unnormalized = likelihood * prior,
posterior = unnormalized / sum(unnormalized))
# Simulate 10K samples weighted on posterior
post_sample <- sample_n(plausible_rates, size = 10000,
weight = posterior, replace = TRUE)
return(post_sample)
}
get_phish_rate <- function(post_sample){
phish_res <- post_sample %>% filter(posterior == max(posterior)) %>%
select(possible_grid) %>% unique()
phish_rate <- phish_res[[1]]
return(phish_rate)
}
bayes_waits_analysis <- function(prior,waits){
# Number Of Simulations
Nrep = 10000
n <- length(waits)
wait_time <- sum(waits)
# Get avg vulns per wait time
vulns_per_wait_time <- n/wait_time
gvals <- GetGammaPriors(prior)
scale_val <- prior_scale
alpha <- gvals[1] * scale_val
lambda <- gvals[2] * scale_val
# prior vuln count + num vulns from data
shape = alpha + n
# prior time + wait time from data
rate = lambda + wait_time
# Simulate 10K vuln day rates: .21, .17, .15, .22 etc
theta_sim = rgamma(Nrep, shape, rate)
# Get interarrival values from the exponential distribution
y_sim = rexp(Nrep, rate = theta_sim)
# Get 95% and 50% quantiles - used for making vertical lines
val95 <- round(quantile(y_sim, 0.95),2)
val50 <- round(quantile(y_sim, 0.50),2)
# Pass interarrival data into DF
df <- tibble(sim=y_sim)
# Pass DF into ggplot
ggplot(df, aes(x=sim)) +
# Declare histogram shape
geom_histogram(aes(y=..density..), colour="black",
fill="white",bins=100)+
# Curve for chart
geom_density(alpha=.2, fill="white")+
# 95% or less vertical line
geom_vline(aes(xintercept=mean(val95)),
color="black", linetype="dashed", size=.5)+
# 50% or less vertical line
geom_vline(aes(xintercept=mean(val50)),
color="black", linetype="dashed", size=.5)+
# Labels for chart
labs(title = "Posterior Predictive Distribution",
subtitle = paste0("95% Chance of Event In: ",
val95," Days Or Less | 50% In: ",
val50, " Or Less")) +
xlab("Days") + ylab("Strength") + xlim(c(0, val95*2))
}
bayes_waits_basic <- function(prior, waits, prior_scale = 1,x_update = 2){
p_avg <- sum(prior)/length(prior)
w_avg <- sum(waits)/length(waits)
xrange <- ifelse(w_avg > p_avg, w_avg, p_avg) * x_update
gvals <- GetGammaPriors(prior)
scale_val <- prior_scale
alpha <- gvals[1] * scale_val
lambda <- gvals[2] * scale_val
# Create a list of the possible rates (for graphing only)
theta = seq(0, xrange, 0.001)
# Score possibilities using priors (for graphing only)
prior = dgamma(theta, shape = alpha, rate = lambda)
# likelihood distribution - for graphing purposes only
likelihood = dgamma(sum(waits), shape = length(waits), rate = theta)
# Posterior - Conjugate prior update.
posterior = dgamma(theta, alpha + length(waits), lambda + sum(waits))
# Bayesian update graph
plot_posterior(theta, prior, likelihood, posterior)
abline(v = qgamma(c(0.025, 0.975), alpha + length(waits), lambda + sum(waits)),
col = "seagreen", lty = 2)
}
non_bayes_waits <- function(waits,range = 30, to_val = ""){
# Determine if event rate measurement should be in days or hours.
ratio_val <- sum(waits)/length(waits)
time_vals <- pexp(1:range, ratio_val,lower.tail = TRUE)
index_val <- max(which(time_vals < .99))
rate_val <- 1000/index_val
if(to_val == ""){
to_val <- round(index_val/2)
}
time_type <- "time"
tibble(x = 0:1000 / rate_val,
prob = pexp(q = 0:1000 / rate_val,
rate = sum(waits)/length(waits),
lower.tail = TRUE)) %>%
# Add a column to the data frame
mutate(Interval = ifelse(x>= 0 & x <= to_val, paste0("0 to ", to_val), "other")) %>%
# Send the data frame to the graphing functions
ggplot(aes(x = x, y = prob, fill = Interval)) +
geom_area(alpha = 0.3) +
labs(title = "Probability Of Event Arriving",
subtitle = paste0(round(time_vals[to_val],2)*100,"% chance of event arriving in ",to_val," day or less."),
x = str_to_title(time_type),
y = "Cumulative Probability")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment