Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
source("manifesto_functions.R")
###########################
### Non-Bayesian Wait Times
# Number of product teams
group_count = 10
# Empty Data Frame to hold results
dt <- tibble()
# Loop through count of groups
for(x in 1:group_count){
# Get Ext,Sev,High random rates. See “from” and “to” ranges.
extreme <- sample(seq(from = .005, to = .03, by= .001), size = 1)
severe <- sample(seq(from = .031, to = .07, by= .001), size = 1)
high <- sample(seq(from = .071, to = .15, by= .001), size = 1)
# Set Random Maturity Values - get one value between 1-4 randomly
patch_fail <- sample(1:4,size=1)
dev_maturity <- sample(1:4,size=1)
# Get Data Frame
res <- GetAdvSecResults(c(extreme, severe, high),
patch_fail, dev_maturity)
# Set Group Number
res$group <- x
# Append to master data frame – makes one large data frame
dt <- bind_rows(dt,res)
}
# Get Critical Rows And All Columns using subsetting
dte <- dt[(dt$audit == 0 & dt$exposure_id == 1 & dt$severity_id == 1 &
dt$week.fseen <= 52), ]
# Get interarrivals. I.e. the difference between weeks numbers
interarrivals <- diff(sort(dte$week.fseen))
# Print interarrivals
interarrivals
#make a histogram
hist(interarrivals, breaks=max(interarrivals))
####################
### Wait-Time By Day
# Same data frame for weeks
dte <- dt[(dt$audit == 0 & dt$exposure_id == 1 &
dt$severity_id == 1 & dt$week.fseen <= 52),]
# Get interarrivals by asking for the day number
interarrivals <- diff(sort(yday(dte$first.seen)))
# Build a histogram of the arrivals.
hist(interarrivals,breaks=max(interarrivals))
#############################
### The Basic Wait-Time Model
# Sum of individual interarrivals.
wait_time <- sum(interarrivals)
# Get count of vuln events. I.e ~60 vulns
n = nrow(dte)
# The ratio of vulnerabilities per day. I.e. 60/356 ~ .17
vulns_per_wait_time <- n/wait_time
# Average Time Between Event Days i.e. 356/60 ~6
avg_days = wait_time/n #Avg wait time in days between extreme vulns
# Exponential Dist to Simulate data. It spits out 60 simulated results
# And each value will wiggle around 1/60 ~ .0167
simulated <- rexp(n, rate=n)
# Multiply simulated rates by 365 (in my case) to get 60 wait-times
sim_interarrivals <- simulated * wait_time
# Create a histogram (one of many since this is a random process)
hist(sim_interarrivals, breaks = max(interarrivals))
# Avg next arrival
avg <- mean(simulated) * wait_time
avg
# Chance of a vulnerability showing up in 0 to 7 days.
pexp(q = 7, rate = vulns_per_wait_time)
# Simulated for 7 days
mean(rexp(n = 10000, rate = vulns_per_wait_time) <= 7)
# Simulate 10 boolean results, note how all are TRUE but one..its random
res <- rexp(10,.17) <= 7
res
# Get ratio of true to false results
mean(res)
# Create Dataframe
tibble(x = 0:1000 / 100,
prob = pexp(q = 0:1000 / 100,
rate = vulns_per_wait_time,
lower.tail = TRUE)) %>%
# Add a column to the data frame
mutate(Interval = ifelse(x >= 0 & x <= 7, "0 to 7", "other")) %>%
# Send the data frame to the graphing functions
ggplot(aes(x = x, y = prob, fill = Interval)) +
geom_area(alpha = 0.3) +
labs(title = "Probability Of Vulnerabilities Arriving",
subtitle = "From 0 to 7 Days",
x = "Days",
y = "Cumulative Probability")
pexp(.01,vulns_per_wait_time)
#######################
### Bayesian Wait-Times
### See GetVulnsGroups() in manifesto_functions.R
# Get 10 product teams
dt <- GetVulnGroups()
# Retrieve first half years worth of data
total_weeks <- 26
dte <- dt[(dt$audit == 0 & dt$exposure_id == 1 & dt$severity_id == 1 &
dt$week.fseen <= total_weeks),]
# Get a count of vulns – key variable
n = nrow(dte)
# Get Interarrivals
interarrivals <- diff(sort(yday(dte$first.seen)))
# Get Total Wait Time – key variable
wait_time <- sum(interarrivals)
# Get avg time per vuln
avg_wait_time = wait_time/n
# Get avg vulns per wait time
vulns_per_wait_time <- n/wait_time
#########################
### Empirical Bayes Prior
# Get new data as if it came from the previous year
dtPrev <- GetVulnGroups()
# Pull only the last quarter’s data. Week 40-52
dtePrev <- dtPrev[(dtPrev$audit == 0 & dtPrev$exposure_id == 1 &
dtPrev$severity_id == 1 &
(dtPrev$week.fseen > 39 & dtPrev$week.fseen <= 52)),]
# Get a list of all the day numbers with events..see example data
day_vals <- sort(yday(dtePrev$first.seen))
# Reformat the data for prior analysis
prior_int <- GetFirstSeenHits(day_vals, min = 275, max = 365)
# Get optimal gamma priors - MLE
gvals <- GetGammaPriors(prior_int)
alpha = gvals[1]
lambda = gvals[2]
# Create a list of the possible rates (for graphing only)
theta = seq(0, .5, 0.001)
# Score possibilities using priors (for graphing only)
prior = dgamma(theta, shape = alpha, rate = lambda)
#####################
### Bayesian Updating
# likelihood distribution - for graphing purposes only
likelihood = dgamma(wait_time, shape = n, rate = theta)
# Posterior - Conjugate prior update.
posterior = dgamma(theta, alpha + n, lambda + wait_time)
# Bayesian update graph
plot_posterior(theta, prior, likelihood, posterior)
abline(v = qgamma(c(0.025, 0.975), alpha + n, lambda + wait_time),
col = "seagreen", lty = 2)
qgamma(c(0.025, 0.975), alpha + n, lambda + wait_time)
################################################
### Making Data and Forecasts from the Posterior
# Number Of Simulations
Nrep = 10000
# prior vuln count + num vulns from data
shape = alpha + n
# prior time + wait time from data
rate = lambda + wait_time
# Simulate 10K vuln day rates: .21, .17, .15, .22 etc
theta_sim = rgamma(Nrep, shape, rate)
# 1/theta_sim is the mean time between events 1/.21 = 4.76 days etc
scaled_sim <- 1/theta_sim
# Get 89% Highest Density Intervals for graph
ci <- ci(scaled_sim, method="HDI")
# Get Mean for graph
mean_sim <- round(mean(scaled_sim),2)
# Put scaled_sim into DF for graph
df <- tibble(sim=scaled_sim)
# Plot Mean interArrivals
ggplot(df, aes(x=sim)) +
# Declare a histogram chart
geom_histogram(aes(y=..density..), colour="black",
fill="white",bins=100)+
# Add a density curve
geom_density(alpha=.2, fill="white")+
# Add a vertical line for the lower CI value
geom_vline(aes(xintercept=ci$CI_low),
color="black", linetype="dashed", size=.5)+
# Add a vertical line for the higher CI value
geom_vline(aes(xintercept=ci$CI_high),
color="black", linetype="dashed", size=.5)+
# Add a mean line
geom_vline(aes(xintercept=mean_sim),
color="black", linetype="dashed", size=.5)+
# Decorate the chart with labels
labs(title = "Posterior Distribution of Mean interArrivals",
subtitle = paste0("Mean Rate: ", mean_sim," | 89% HDI: ",
round(ci$CI_low,2), " To: ", round(ci$CI_high,2))) +
xlab("Mean interArrival Rates") +
ylab("Strength")
### Posterior Predictive Distribution
# Get interarrival values from the exponential distribution
y_sim = rexp(Nrep, rate = theta_sim)
# Get 95% and 50% quantiles - used for making vertical lines
val95 <- round(quantile(y_sim, 0.95),2)
val50 <- round(quantile(y_sim, 0.50),2)
# Pass interarrival data into DF
df <- tibble(sim=y_sim)
# Pass DF into ggplot
ggplot(df, aes(x=sim)) +
# Declare histogram shape
geom_histogram(aes(y=..density..), colour="black",
fill="white",bins=100)+
# Curve for chart
geom_density(alpha=.2, fill="white")+
# 95% or less vertical line
geom_vline(aes(xintercept=mean(val95)),
color="black", linetype="dashed", size=.5)+
# 50% or less vertical line
geom_vline(aes(xintercept=mean(val50)),
color="black", linetype="dashed", size=.5)+
# Labels for chart
labs(title = "Posterior Predictive Distribution",
subtitle = paste0("95% Chance of Vulnerability In: ",
val95," Days Or Less | 50% In: ",
val50, " Or Less")) +
xlab("Days") + ylab("Strength") + xlim(c(0, 50))
########################
### Mitigatable Surprise
GetNVDAnalysis(c(2018:2020), new = TRUE, save = TRUE, from_file = TRUE)
GetNVDAnalysis(c(2018:2020))
####
# Note that the supporting functions are all detailed in
# manifesto_functions.R
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment