This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("manifesto_functions.R") | |
############################# | |
### Phish In 15 Lines of Code | |
## Simple ABCs of Phishing | |
# Make lot's of phish | |
sim_phish <- 1000000 | |
# Count of phishing emails | |
campaign_size <- 100 | |
# Make no assumptions about click rates | |
prior_rate <- runif(sim_phish, 0, 1) | |
hist(prior_rate) | |
# Make Phish Data - by looping 1 Million times | |
phish_chance <- rep(NA, sim_phish) | |
for(i in 1:sim_phish) { | |
phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i]) | |
} | |
# First 20 phish | |
head(phish_chance, 20) | |
# First 20 prior_rate | |
head(prior_rate,20) | |
# Example of random | |
rbinom(20,100,.10) | |
################### | |
### Posterior Phish | |
posterior_phish <- prior_rate[phish_chance == 5] | |
# Show top 20 phish_chance | |
head(phish_chance == 5,20) | |
# Get First True phish_chance | |
first.true <- which(phish_chance == 1)[1] | |
# A prior rate associated with 5 | |
prior_rate[first.true] | |
# Histogram of final results | |
hist(posterior_phish, xlim = c(0, .2)) | |
# 95% Quantile | |
quantile(posterior_phish, c(0.025, 0.975)) | |
######################## | |
### Informative Phish | |
# CISO's Informative Prior | |
shape_vals <- GetBeliefsEvents(.01,.04) | |
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2]) | |
# Make highly informed phish | |
phish_chance <- rep(NA, sim_phish) | |
for(i in 1:sim_phish) { | |
phish_chance[i] <- rbinom(1, size = campaign_size, | |
prob = prior_rate[i]) | |
} | |
# Extract rates based on the 5 we observed | |
posterior_phish <- prior_rate[phish_chance == 5] | |
# Count up posterior results – bigger result. Data is denser near 5% | |
length(posterior_phish) | |
# Get the mean | |
mean(posterior_phish) | |
# Get the 95% interval | |
quantile(posterior_phish, c(0.025, 0.975)) | |
# Get the 95% Highest Density Interval | |
ci(posterior_phish,method="HDI", ci = 0.95, verbose = FALSE) | |
# Create a histogram of the results | |
hist(posterior_phish, xlim = c(0, .2)) | |
# What’s the chance of of phishing being over 5% etc | |
sum(posterior_phish > 0.05) / length(posterior_phish) | |
round(sum(posterior_phish > .06) / length(posterior_phish),3) | |
round(sum(posterior_phish > .07) / length(posterior_phish),3) | |
round(sum(posterior_phish > .08) / length(posterior_phish),3) | |
round(sum(posterior_phish > .09) / length(posterior_phish),3) | |
##################################### | |
### ABC Conclusions and Complete Code | |
###### | |
## Simple ABC | |
# Make lot's of phish | |
sim_phish <- 1000000 | |
campaign_size <- 100 | |
# Make no assumptions about click rates | |
# Un-comment if you want an uninformative prior | |
#prior_rate <- runif(sim_phish, 0, 1) | |
# CISO's Informative Prior | |
shape_vals <- GetBeliefsEvents(.01,.04) #Book Function | |
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2]) | |
hist(prior_rate) | |
# Make Phish Data | |
phish_chance <- rep(NA, sim_phish) | |
for(i in 1:sim_phish) { | |
phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i]) | |
} | |
# Extract rates based on the 5 we observed | |
posterior_phish <- prior_rate[phish_chance == 5] | |
# Quick Checks | |
length(posterior_phish) | |
mean(posterior_phish) | |
# Credible Ranges | |
quantile(posterior_phish, c(0.025, 0.975)) | |
# ci(posterior_phish,method="HDI", verbose = FALSE) # Optional | |
# CISO 5% or greater question | |
sum(posterior_phish > 0.05) / length(posterior_phish) | |
# Histogram of final results | |
hist(posterior_phish, xlim = c(0, .2)) | |
################################ | |
### From ABC To Canonical Bayes | |
# Results from POC with vendor | |
phish <- 5; attempts <- 100; | |
# Parameter Grid | |
phish_grid <- seq(from=0,to=1,length.out=1000) | |
# Posterior – multiplies likelihood times prior | |
posterior_phish <- dbinom(phish, attempts, phish_grid)*dunif(phish_grid,0,1) | |
# Normalized Posterior - sums to 1 | |
posterior_phish <- posterior_phish/sum(posterior_phish) | |
plot(phish_grid, posterior_phish, type='l', col='black') | |
##### | |
## Normal Bayes Compact | |
# Clicks on Phish | |
phish <- 5; | |
# Emails Sent | |
attempts <- 100; | |
# Create Phish Grid, avoid 0...clashes with our prior | |
phish_grid <- seq(0.0001, 1,0.0001) | |
# Informative Prior Shape Values – CISOs beliefs | |
shape_vals <- GetBeliefsEvents(.01,.04) | |
# Informative Prior Distribution | |
prior <- dbeta(phish_grid, shape_vals[1], shape_vals[2]) | |
# Likelihood of the Data | |
likelihood <- dbinom(phish, attempts,phish_grid) | |
# Product of Likelihood and Prior | |
product <- likelihood * prior | |
# Scaled to sum to one | |
posterior <- product/sum(product) | |
# A table of data for pedagogical purposes | |
bayes_table <- tibble(theta = phish_grid, prior, likelihood, product, posterior) | |
head(bayes_table, 10) | |
### Bayes Plot | |
# Plot Prior | |
plot(phish_grid, prior, type='l', col='orange', xlim=c(0, .2), | |
ylab='', yaxt='n') | |
# Plot Likelihood | |
par(new=T) | |
plot(phish_grid, likelihood, type='l', col='skyblue', xlim=c(0, .2), | |
ylab='', yaxt='n') | |
# Plot Posterior | |
par(new=T) | |
plot(phish_grid, product, type='l', col='seagreen', xlim=c(0, .2), | |
ylab='', yaxt='n') | |
# Legend | |
legend("topright", c("prior", "likelihood", "posterior"), lty=1, | |
col=c("orange", "skyblue", "seagreen")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment