Skip to content

Instantly share code, notes, and snippets.

@smartinsightsfromdata
Forked from davidski/openFairSim.R
Last active August 29, 2015 14:07
Show Gist options
  • Save smartinsightsfromdata/16f8e069e6f0ad89b5d8 to your computer and use it in GitHub Desktop.
Save smartinsightsfromdata/16f8e069e6f0ad89b5d8 to your computer and use it in GitHub Desktop.
# for *pert functons
library('mc2d')
# given the number of loss events, calculate the sum of losses for the number of events
sample_LM <- function(N, L, ML, H, CONF){
# Calculate the aggregate loss
#
# ARGS:
# N - number of threat events to evaluate
# L - low boundary
# ML - most likely
# H - high boundary
# CONF - confidence
#
# RETURNS:
# sum of loss events
result <- sum(rpert(N, L, ML, H, shape = CONF) )
return(result)
}
select_events <- function (N, TSestimate, RSestimate) {
# Calculate the threat strength and whether the asset resists the attack
#
# ARGS:
# N - number of threat events to evaluate
# TSestimate - threat strength estimate
# RSestimate - threat strength estimate
#
# RETURNS:
# Number of succesfull attacks (i.e. loss events)
# sample threat and resistive strength
TSsamples <- rpert(N, TSestimate$L, TSestimate$ML, TSestimate$H, shape = TSestimate$CONF)
RSsamples <- rpert(N, RSestimate$L, RSestimate$ML, RSestimate$H, shape = RSestimate$CONF)
# loss events occur whenever TS > RS
VULNsamples <- TSsamples > RSsamples
return(length(VULNsamples[VULNsamples == TRUE]))
}
# calculate the ALE for a given scenario
calculate_ALE <- function(scenario, N = 10^4, title = 'Untitled', verbose = F) {
# run an OpenFAIR simulation
#
# ARGS:
# scenario - list of l/ml/h/conf pairs for tef, rs, and lm
# N - number of simulations to run
# title - optional name of scenario
# verbose - whether to print progress indicators
#
# RETURNS:
# list of scenario name and ALE samples
# make samples repeatable (and l33t)
set.seed(31337)
if (verbose) cat(paste0('Working on scenario ', title, '\n'))
# calibrated estimates
# extending to UI framework of your choice is left as an exercise for the reader
TEFestimate <- data.frame(L = scenario[['tef_l']], ML = scenario[['tef_ml']], H = scenario[['tef_h']], CONF = scenario[['tef_conf']])
TSestimate <- data.frame(L = 20, ML = 30, H = 70, CONF = 1)
RSestimate <- data.frame(L = scenario[['rs_l']], ML = scenario[['rs_ml']], H = scenario[['rs_h']], CONF = scenario[['rs_conf']])
LMestimate <- data.frame(L = scenario[['lm_l']], ML = scenario[['lm_ml']], H = scenario[['lm_h']], CONF = scenario[['lm_conf']])
# TEF - how many contacts do we have in each year
TEFsamples <- rpert(N, TEFestimate$L, TEFestimate$ML, TEFestimate$H, shape = TEFestimate$CONF)
# fractional events per year is nonsensical, so round to the nearest integer
TEFsamples <- round(TEFsamples)
# for each number of contacts (TEF), calculate how many of those succeed (become LEF counts)
LEF <- sapply(TEFsamples, function(x) select_events(x, TSestimate, RSestimate))
# for the range of loss events, calculate the annual sum of losses across the range of possible LMs
ALEsamples <- sapply(LEF, function(x) sample_LM(x, L = LMestimate$L, ML = LMestimate$ML, H = LMestimate$H, CONF = LMestimate$CONF))
# summary stats for ALE
if (verbose) {
print(summary(ALEsamples))
VaR <- quantile(ALEsamples, probs=(0.95))
cat(paste0("Losses at 95th percentile are $", format(VaR, nsmall = 2, big.mark = ","), "\n"))
}
simulation_results <- list(title = as.character(title), ALEsamples = ALEsamples)
return (simulation_results)
}
# plotting, commas, and binning libraries
library('ggplot2')
library('scales')
library('hexbin')
convert_qual_to_quant <- function(app) {
# Convert qualitative ratings to quant estimate ranges
#
# ARGS:
# app - list of scenarios, including TEF, RS, and LM qualitative labels
#
# RETURNS:
# list of tef_estimate, rs_estimate, lm_estimate, and scenario name
if (app['TEF'] == "Critical") {
tef_estimate <- data.frame(tef_l=1, tef_ml=24, tef_h=365, tef_conf=1)}
if (app['TEF'] == "High") {
tef_estimate <- data.frame(tef_l=1, tef_ml=12, tef_h=50, tef_conf=1)}
if (app['TEF'] == "Medium") {
tef_estimate <- data.frame(tef_l=0, tef_ml=6, tef_h=50, tef_conf=1)}
if (app['TEF'] == "Low") {
tef_estimate <- data.frame(tef_l=0, tef_ml=6, tef_h=24, tef_conf=1)}
if (app['TEF'] == "Very Low") {
tef_estimate <- data.frame(tef_l=0, tef_ml=2, tef_h=12, tef_conf=1)}
if (app['RS'] == "Critical") {
rs_estimate <- data.frame(rs_l=1, rs_ml=24, rs_h=365, rs_conf=1)}
if (app['RS'] == "High") {
rs_estimate <- data.frame(rs_l=1, rs_ml=12, rs_h=50, rs_conf=1)}
if (app['RS'] == "Medium") {
rs_estimate <- data.frame(rs_l=40, rs_ml=50, rs_h=60, rs_conf=1)}
if (app['RS'] == "Low") {
rs_estimate <- data.frame(rs_l=20, rs_ml=40, rs_h=60, rs_conf=1)}
if (app['RS'] == "Very Low") {
rs_estimate <- data.frame(rs_l=10, rs_ml=30, rs_h=50, rs_conf=1)}
if (app['PLM'] == "Critical") {
lm_estimate <- data.frame(lm_l=1, lm_ml=24, lm_h=365, lm_conf=1)}
if (app['PLM'] == "High") {
lm_estimate <- data.frame(lm_l=10^4, lm_ml=5*10^4, lm_h=10^6, lm_conf=1)}
if (app['PLM'] == "Medium") {
lm_estimate <- data.frame(lm_l=5*10^3, lm_ml=10^4, lm_h=5*10^4, lm_conf=1)}
if (app['PLM'] == "Low") {
lm_estimate <- data.frame(lm_l=10^3, lm_ml=2*10^3, lm_h=10^4, lm_conf=1)}
if (app['PLM'] == "Very Low") {
lm_estimate <- data.frame(lm_l=0, lm_ml=2, h=12, conf=1)}
estimate <- cbind(tef_estimate, rs_estimate, lm_estimate, title = as.character(app[['Application.Name']]))
#print(paste("TEF is ", TEF))
return(estimate)
}
scenario_data <- read.csv('app_data.tsv', sep='\t', stringsAsFactors = F)
scenario_data <- apply(scenario_data[,1:4], 1, convert_qual_to_quant)
results <- lapply(scenario_data, function(x) calculate_ALE(scenario = x[1:12], title = x$title, verbose = TRUE))
summary_data <- do.call(rbind, lapply(results, function(x) {
data.frame(title = x[['title']],
ALEmedian = median(x[['ALEsamples']]),
ALEmax = max(x[['ALEsamples']])
)
}
))
# scatter plot of median vs. max ALEs
gg <- ggplot(summary_data, aes(x = ALEmedian, y = ALEmax))
gg <- gg + geom_point(postion = "jitter", alpha = 0.1)
#gg <- gg + stat_bin2d(bins = 50) + scale_fill_gradient(low = 'lightblue', high = 'red')
gg <- gg + stat_binhex(bins = 50) + scale_fill_gradient(low = 'lightblue', high = 'red')
#gg <- gg + geom_text(aes(label = title), size = 4)
gg <- gg + scale_x_log10(labels = comma) + scale_y_log10(labels = comma)
gg <- gg + theme_bw()
print(gg)
# histogram of median ALEs
gg <- ggplot(summary_data, aes(x = ALEmedian))
gg <- gg + geom_histogram(binwidth = diff(range(summary_data$ALEmedian) / 50), aes(y = ..density..), color = "black", fill = "white")
gg <- gg + geom_density(fill = "steelblue", alpha = 1/3)
gg <- gg + scale_x_continuous(labels = comma) + scale_y_continuous(labels = comma)
gg <- gg + theme_bw()
print(gg)
# create histogram
#gg <- ggplot(ALEsamples, aes(x = ALEsamples))
#gg <- gg + geom_histogram(binwidth = diff(range(ALEsamples$ALEsamples)/50), aes(y = ..density..), color = "black", fill = "white")
#gg <- gg + geom_density(fill = "steelblue", alpha = 1/3)
#gg <- gg + scale_x_continuous(labels = comma)
#gg <- gg + theme_bw()
#gg <- gg + geom_point(aes(y=cumloss)) + geom_path(aes(y=cumloss, group=1))
#print(gg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment