ribsy/chapter_three_scripts.R

## chapter_three_scripts.R
source("manifesto_functions.R")


#############################
### Phish In 15 Lines of Code

## Simple ABCs of Phishing

# Make lot's of phish
sim_phish <- 1000000

# Count of phishing emails
campaign_size <- 100

# Make no assumptions about click rates
prior_rate <- runif(sim_phish, 0, 1)

hist(prior_rate)

# Make Phish Data - by looping 1 Million times
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
  phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
}

# First 20 phish
head(phish_chance, 20)

# First 20 prior_rate
head(prior_rate,20)

# Example of random
rbinom(20,100,.10)

###################
### Posterior Phish
posterior_phish <- prior_rate[phish_chance == 5]

# Show top 20 phish_chance
head(phish_chance == 5,20)

# Get First True phish_chance
first.true <- which(phish_chance == 1)[1]

# A prior rate associated with 5
prior_rate[first.true]

# Histogram of final results
hist(posterior_phish, xlim = c(0, .2))

# 95% Quantile
quantile(posterior_phish, c(0.025, 0.975))

########################
### Informative Phish

# CISO's Informative Prior
shape_vals <- GetBeliefsEvents(.01,.04)
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])

# Make highly informed phish
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
  phish_chance[i] <- rbinom(1, size = campaign_size,
                            prob = prior_rate[i])
}

# Extract rates based on the 5 we observed
posterior_phish <- prior_rate[phish_chance == 5]

# Count up posterior results – bigger result. Data is denser near 5%
length(posterior_phish)

# Get the mean
mean(posterior_phish)

# Get the 95% interval
quantile(posterior_phish, c(0.025, 0.975))

# Get the 95% Highest Density Interval
ci(posterior_phish,method="HDI", ci = 0.95, verbose = FALSE)

# Create a histogram of the results
hist(posterior_phish, xlim = c(0, .2))

# What’s the chance of of phishing being over 5% etc
sum(posterior_phish > 0.05) / length(posterior_phish)

round(sum(posterior_phish > .06) / length(posterior_phish),3)
round(sum(posterior_phish > .07) / length(posterior_phish),3)
round(sum(posterior_phish > .08) / length(posterior_phish),3)
round(sum(posterior_phish > .09) / length(posterior_phish),3)

#####################################
### ABC Conclusions and Complete Code

######
## Simple ABC

# Make lot's of phish
sim_phish <- 1000000
campaign_size <- 100

# Make no assumptions about click rates
# Un-comment if you want an uninformative prior
#prior_rate <- runif(sim_phish, 0, 1)

# CISO's Informative Prior
shape_vals <- GetBeliefsEvents(.01,.04)  #Book Function
prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])

hist(prior_rate)

# Make Phish Data
phish_chance <- rep(NA, sim_phish)
for(i in 1:sim_phish) {
  phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
}

# Extract rates based on the 5 we observed
posterior_phish <- prior_rate[phish_chance == 5]

# Quick Checks
length(posterior_phish)
mean(posterior_phish)

# Credible Ranges
quantile(posterior_phish, c(0.025, 0.975))
# ci(posterior_phish,method="HDI", verbose = FALSE) # Optional

# CISO 5% or greater question
sum(posterior_phish > 0.05) / length(posterior_phish)

# Histogram of final results
hist(posterior_phish, xlim = c(0, .2))

################################
### From ABC To Canonical Bayes

# Results from POC with vendor
phish <- 5; attempts <- 100;

# Parameter Grid
phish_grid <- seq(from=0,to=1,length.out=1000)

# Posterior – multiplies likelihood times prior
posterior_phish <- dbinom(phish, attempts, phish_grid)*dunif(phish_grid,0,1)

# Normalized Posterior - sums to 1
posterior_phish <- posterior_phish/sum(posterior_phish)

plot(phish_grid, posterior_phish, type='l', col='black')

#####
## Normal Bayes Compact

# Clicks on Phish
phish <- 5;

# Emails Sent
attempts <- 100;

# Create Phish Grid, avoid 0...clashes with our prior
phish_grid <- seq(0.0001, 1,0.0001)

# Informative Prior Shape Values – CISOs beliefs
shape_vals <- GetBeliefsEvents(.01,.04)

# Informative Prior Distribution
prior <- dbeta(phish_grid, shape_vals[1], shape_vals[2])

# Likelihood of the Data
likelihood <- dbinom(phish, attempts,phish_grid)

# Product of Likelihood and Prior
product <- likelihood * prior


# Scaled to sum to one
posterior <- product/sum(product)


# A table of data for pedagogical purposes
bayes_table <- tibble(theta = phish_grid, prior, likelihood, product, posterior)

head(bayes_table, 10)

### Bayes Plot
# Plot Prior
plot(phish_grid, prior, type='l', col='orange', xlim=c(0, .2),
     ylab='', yaxt='n')

# Plot Likelihood
par(new=T)
plot(phish_grid, likelihood, type='l', col='skyblue', xlim=c(0, .2),
     ylab='',  yaxt='n')

# Plot Posterior
par(new=T)
plot(phish_grid, product, type='l', col='seagreen', xlim=c(0, .2),
     ylab='', yaxt='n')

# Legend
legend("topright", c("prior", "likelihood", "posterior"), lty=1,
       col=c("orange", "skyblue", "seagreen"))
	source("manifesto_functions.R")


	#############################
	### Phish In 15 Lines of Code

	## Simple ABCs of Phishing

	# Make lot's of phish
	sim_phish <- 1000000

	# Count of phishing emails
	campaign_size <- 100

	# Make no assumptions about click rates
	prior_rate <- runif(sim_phish, 0, 1)

	hist(prior_rate)

	# Make Phish Data - by looping 1 Million times
	phish_chance <- rep(NA, sim_phish)
	for(i in 1:sim_phish) {
	phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
	}

	# First 20 phish
	head(phish_chance, 20)

	# First 20 prior_rate
	head(prior_rate,20)

	# Example of random
	rbinom(20,100,.10)

	###################
	### Posterior Phish
	posterior_phish <- prior_rate[phish_chance == 5]

	# Show top 20 phish_chance
	head(phish_chance == 5,20)

	# Get First True phish_chance
	first.true <- which(phish_chance == 1)[1]

	# A prior rate associated with 5
	prior_rate[first.true]

	# Histogram of final results
	hist(posterior_phish, xlim = c(0, .2))

	# 95% Quantile
	quantile(posterior_phish, c(0.025, 0.975))

	########################
	### Informative Phish

	# CISO's Informative Prior
	shape_vals <- GetBeliefsEvents(.01,.04)
	prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])

	# Make highly informed phish
	phish_chance <- rep(NA, sim_phish)
	for(i in 1:sim_phish) {
	phish_chance[i] <- rbinom(1, size = campaign_size,
	prob = prior_rate[i])
	}

	# Extract rates based on the 5 we observed
	posterior_phish <- prior_rate[phish_chance == 5]

	# Count up posterior results – bigger result. Data is denser near 5%
	length(posterior_phish)

	# Get the mean
	mean(posterior_phish)

	# Get the 95% interval
	quantile(posterior_phish, c(0.025, 0.975))

	# Get the 95% Highest Density Interval
	ci(posterior_phish,method="HDI", ci = 0.95, verbose = FALSE)

	# Create a histogram of the results
	hist(posterior_phish, xlim = c(0, .2))

	# What’s the chance of of phishing being over 5% etc
	sum(posterior_phish > 0.05) / length(posterior_phish)

	round(sum(posterior_phish > .06) / length(posterior_phish),3)
	round(sum(posterior_phish > .07) / length(posterior_phish),3)
	round(sum(posterior_phish > .08) / length(posterior_phish),3)
	round(sum(posterior_phish > .09) / length(posterior_phish),3)

	#####################################
	### ABC Conclusions and Complete Code

	######
	## Simple ABC

	# Make lot's of phish
	sim_phish <- 1000000
	campaign_size <- 100

	# Make no assumptions about click rates
	# Un-comment if you want an uninformative prior
	#prior_rate <- runif(sim_phish, 0, 1)

	# CISO's Informative Prior
	shape_vals <- GetBeliefsEvents(.01,.04) #Book Function
	prior_rate <- rbeta(sim_phish, shape_vals[1], shape_vals[2])

	hist(prior_rate)

	# Make Phish Data
	phish_chance <- rep(NA, sim_phish)
	for(i in 1:sim_phish) {
	phish_chance[i] <- rbinom(1, size = campaign_size, prob = prior_rate[i])
	}

	# Extract rates based on the 5 we observed
	posterior_phish <- prior_rate[phish_chance == 5]

	# Quick Checks
	length(posterior_phish)
	mean(posterior_phish)

	# Credible Ranges
	quantile(posterior_phish, c(0.025, 0.975))
	# ci(posterior_phish,method="HDI", verbose = FALSE) # Optional

	# CISO 5% or greater question
	sum(posterior_phish > 0.05) / length(posterior_phish)

	# Histogram of final results
	hist(posterior_phish, xlim = c(0, .2))

	################################
	### From ABC To Canonical Bayes

	# Results from POC with vendor
	phish <- 5; attempts <- 100;

	# Parameter Grid
	phish_grid <- seq(from=0,to=1,length.out=1000)

	# Posterior – multiplies likelihood times prior
	posterior_phish <- dbinom(phish, attempts, phish_grid)*dunif(phish_grid,0,1)

	# Normalized Posterior - sums to 1
	posterior_phish <- posterior_phish/sum(posterior_phish)

	plot(phish_grid, posterior_phish, type='l', col='black')

	#####
	## Normal Bayes Compact

	# Clicks on Phish
	phish <- 5;

	# Emails Sent
	attempts <- 100;

	# Create Phish Grid, avoid 0...clashes with our prior
	phish_grid <- seq(0.0001, 1,0.0001)

	# Informative Prior Shape Values – CISOs beliefs
	shape_vals <- GetBeliefsEvents(.01,.04)

	# Informative Prior Distribution
	prior <- dbeta(phish_grid, shape_vals[1], shape_vals[2])

	# Likelihood of the Data
	likelihood <- dbinom(phish, attempts,phish_grid)

	# Product of Likelihood and Prior
	product <- likelihood * prior


	# Scaled to sum to one
	posterior <- product/sum(product)


	# A table of data for pedagogical purposes
	bayes_table <- tibble(theta = phish_grid, prior, likelihood, product, posterior)

	head(bayes_table, 10)

	### Bayes Plot
	# Plot Prior
	plot(phish_grid, prior, type='l', col='orange', xlim=c(0, .2),
	ylab='', yaxt='n')

	# Plot Likelihood
	par(new=T)
	plot(phish_grid, likelihood, type='l', col='skyblue', xlim=c(0, .2),
	ylab='', yaxt='n')

	# Plot Posterior
	par(new=T)
	plot(phish_grid, product, type='l', col='seagreen', xlim=c(0, .2),
	ylab='', yaxt='n')

	# Legend
	legend("topright", c("prior", "likelihood", "posterior"), lty=1,
	col=c("orange", "skyblue", "seagreen"))