Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
source("manifesto_functions.R")
#############################
### Phish In 15 Lines of Code
## Simple ABCs of Phishing
# Make lot's of phish
sim_phish <- 1000000
# Count of phishing emails
campaign_size <- 100
# Make no assumptions about click rates
prior_rate <- runif(sim_phish, 0, 1)
hist(prior_rate)
# Make Phish Data - by looping 1 Million times
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
}
# First 20 phish
head(phish_chance, 20)
# First 20 prior_rate
head(prior_rate,20)
# Example of random
rbinom(20,100,.10)
###################
### Posterior Phish
posterior_phish <- prior_rate[phish_chance == 5]
# Show top 20 phish_chance
head(phish_chance == 5,20)
# Get First True phish_chance
first.true <- which(phish_chance == 1)[1]
# A prior rate associated with 5
prior_rate[first.true]
# Histogram of final results
hist(posterior_phish, xlim = c(0, .2))
# 95% Quantile
quantile(posterior_phish, c(0.025, 0.975))
########################
### Informative Phish
# CISO's Informative Prior
shape_vals <- GetBeliefsEvents(.01,.04)
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])
# Make highly informed phish
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
phish_chance[i] <- rbinom(1, size = campaign_size,
prob = prior_rate[i])
}
# Extract rates based on the 5 we observed
posterior_phish <- prior_rate[phish_chance == 5]
# Count up posterior results – bigger result. Data is denser near 5%
length(posterior_phish)
# Get the mean
mean(posterior_phish)
# Get the 95% interval
quantile(posterior_phish, c(0.025, 0.975))
# Get the 95% Highest Density Interval
ci(posterior_phish,method="HDI", ci = 0.95, verbose = FALSE)
# Create a histogram of the results
hist(posterior_phish, xlim = c(0, .2))
# What’s the chance of of phishing being over 5% etc
sum(posterior_phish > 0.05) / length(posterior_phish)
round(sum(posterior_phish > .06) / length(posterior_phish),3)
round(sum(posterior_phish > .07) / length(posterior_phish),3)
round(sum(posterior_phish > .08) / length(posterior_phish),3)
round(sum(posterior_phish > .09) / length(posterior_phish),3)
#####################################
### ABC Conclusions and Complete Code
######
## Simple ABC
# Make lot's of phish
sim_phish <- 1000000
campaign_size <- 100
# Make no assumptions about click rates
# Un-comment if you want an uninformative prior
#prior_rate <- runif(sim_phish, 0, 1)
# CISO's Informative Prior
shape_vals <- GetBeliefsEvents(.01,.04) #Book Function
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])
hist(prior_rate)
# Make Phish Data
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
}
# Extract rates based on the 5 we observed
posterior_phish <- prior_rate[phish_chance == 5]
# Quick Checks
length(posterior_phish)
mean(posterior_phish)
# Credible Ranges
quantile(posterior_phish, c(0.025, 0.975))
# ci(posterior_phish,method="HDI", verbose = FALSE) # Optional
# CISO 5% or greater question
sum(posterior_phish > 0.05) / length(posterior_phish)
# Histogram of final results
hist(posterior_phish, xlim = c(0, .2))
################################
### From ABC To Canonical Bayes
# Results from POC with vendor
phish <- 5; attempts <- 100;
# Parameter Grid
phish_grid <- seq(from=0,to=1,length.out=1000)
# Posterior – multiplies likelihood times prior
posterior_phish <- dbinom(phish, attempts, phish_grid)*dunif(phish_grid,0,1)
# Normalized Posterior - sums to 1
posterior_phish <- posterior_phish/sum(posterior_phish)
plot(phish_grid, posterior_phish, type='l', col='black')
#####
## Normal Bayes Compact
# Clicks on Phish
phish <- 5;
# Emails Sent
attempts <- 100;
# Create Phish Grid, avoid 0...clashes with our prior
phish_grid <- seq(0.0001, 1,0.0001)
# Informative Prior Shape Values – CISOs beliefs
shape_vals <- GetBeliefsEvents(.01,.04)
# Informative Prior Distribution
prior <- dbeta(phish_grid, shape_vals[1], shape_vals[2])
# Likelihood of the Data
likelihood <- dbinom(phish, attempts,phish_grid)
# Product of Likelihood and Prior
product <- likelihood * prior
# Scaled to sum to one
posterior <- product/sum(product)
# A table of data for pedagogical purposes
bayes_table <- tibble(theta = phish_grid, prior, likelihood, product, posterior)
head(bayes_table, 10)
### Bayes Plot
# Plot Prior
plot(phish_grid, prior, type='l', col='orange', xlim=c(0, .2),
ylab='', yaxt='n')
# Plot Likelihood
par(new=T)
plot(phish_grid, likelihood, type='l', col='skyblue', xlim=c(0, .2),
ylab='', yaxt='n')
# Plot Posterior
par(new=T)
plot(phish_grid, product, type='l', col='seagreen', xlim=c(0, .2),
ylab='', yaxt='n')
# Legend
legend("topright", c("prior", "likelihood", "posterior"), lty=1,
col=c("orange", "skyblue", "seagreen"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment