Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sample OpenFAIR model
# for *pert functons
library('mc2d')
# given the number of loss events, calculate the sum of losses for the number of events
sample_LM <- function(N, L, ML, H, CONF){
# Calculate the aggregate loss
#
# ARGS:
# N - number of threat events to evaluate
# L - low boundary
# ML - most likely
# H - high boundary
# CONF - confidence
#
# RETURNS:
# sum of loss events
result <- sum(rpert(N, L, ML, H, shape = CONF) )
return(result)
}
select_events <- function (N, TSestimate, RSestimate) {
# Calculate the threat strength and whether the asset resists the attack
#
# ARGS:
# N - number of threat events to evaluate
# TSestimate - threat strength estimate
# RSestimate - threat strength estimate
#
# RETURNS:
# Number of succesfull attacks (i.e. loss events)
# sample threat and resistive strength
TSsamples <- rpert(N, TSestimate$L, TSestimate$ML, TSestimate$H, shape = TSestimate$CONF)
RSsamples <- rpert(N, RSestimate$L, RSestimate$ML, RSestimate$H, shape = RSestimate$CONF)
# loss events occur whenever TS > RS
VULNsamples <- TSsamples > RSsamples
return(length(VULNsamples[VULNsamples == TRUE]))
}
# calculate the ALE for a given scenario
calculate_ALE <- function(scenario, N = 10^4, title = 'Untitled', verbose = F) {
# run an OpenFAIR simulation
#
# ARGS:
# scenario - list of l/ml/h/conf pairs for tef, rs, and lm
# N - number of simulations to run
# title - optional name of scenario
# verbose - whether to print progress indicators
#
# RETURNS:
# list of scenario name and ALE samples
# make samples repeatable (and l33t)
set.seed(31337)
if (verbose) cat(paste0('Working on scenario ', title, '\n'))
# calibrated estimates
# extending to UI framework of your choice is left as an exercise for the reader
TEFestimate <- data.frame(L = scenario[['tef_l']], ML = scenario[['tef_ml']], H = scenario[['tef_h']], CONF = scenario[['tef_conf']])
TSestimate <- data.frame(L = 20, ML = 30, H = 70, CONF = 1)
RSestimate <- data.frame(L = scenario[['rs_l']], ML = scenario[['rs_ml']], H = scenario[['rs_h']], CONF = scenario[['rs_conf']])
LMestimate <- data.frame(L = scenario[['lm_l']], ML = scenario[['lm_ml']], H = scenario[['lm_h']], CONF = scenario[['lm_conf']])
# TEF - how many contacts do we have in each year
TEFsamples <- rpert(N, TEFestimate$L, TEFestimate$ML, TEFestimate$H, shape = TEFestimate$CONF)
# fractional events per year is nonsensical, so round to the nearest integer
TEFsamples <- round(TEFsamples)
# for each number of contacts (TEF), calculate how many of those succeed (become LEF counts)
LEF <- sapply(TEFsamples, function(x) select_events(x, TSestimate, RSestimate))
# for the range of loss events, calculate the annual sum of losses across the range of possible LMs
ALEsamples <- sapply(LEF, function(x) sample_LM(x, L = LMestimate$L, ML = LMestimate$ML, H = LMestimate$H, CONF = LMestimate$CONF))
# summary stats for ALE
if (verbose) {
print(summary(ALEsamples))
VaR <- quantile(ALEsamples, probs=(0.95))
cat(paste0("Losses at 95th percentile are $", format(VaR, nsmall = 2, big.mark = ","), "\n"))
}
simulation_results <- list(title = as.character(title), ALEsamples = ALEsamples)
return (simulation_results)
}
# plotting, commas, and binning libraries
library('ggplot2')
library('scales')
library('hexbin')
convert_qual_to_quant <- function(app) {
# Convert qualitative ratings to quant estimate ranges
#
# ARGS:
# app - list of scenarios, including TEF, RS, and LM qualitative labels
#
# RETURNS:
# list of tef_estimate, rs_estimate, lm_estimate, and scenario name
if (app['TEF'] == "Critical") {
tef_estimate <- data.frame(tef_l=1, tef_ml=24, tef_h=365, tef_conf=1)}
if (app['TEF'] == "High") {
tef_estimate <- data.frame(tef_l=1, tef_ml=12, tef_h=50, tef_conf=1)}
if (app['TEF'] == "Medium") {
tef_estimate <- data.frame(tef_l=0, tef_ml=6, tef_h=50, tef_conf=1)}
if (app['TEF'] == "Low") {
tef_estimate <- data.frame(tef_l=0, tef_ml=6, tef_h=24, tef_conf=1)}
if (app['TEF'] == "Very Low") {
tef_estimate <- data.frame(tef_l=0, tef_ml=2, tef_h=12, tef_conf=1)}
if (app['RS'] == "Critical") {
rs_estimate <- data.frame(rs_l=1, rs_ml=24, rs_h=365, rs_conf=1)}
if (app['RS'] == "High") {
rs_estimate <- data.frame(rs_l=1, rs_ml=12, rs_h=50, rs_conf=1)}
if (app['RS'] == "Medium") {
rs_estimate <- data.frame(rs_l=40, rs_ml=50, rs_h=60, rs_conf=1)}
if (app['RS'] == "Low") {
rs_estimate <- data.frame(rs_l=20, rs_ml=40, rs_h=60, rs_conf=1)}
if (app['RS'] == "Very Low") {
rs_estimate <- data.frame(rs_l=10, rs_ml=30, rs_h=50, rs_conf=1)}
if (app['PLM'] == "Critical") {
lm_estimate <- data.frame(lm_l=1, lm_ml=24, lm_h=365, lm_conf=1)}
if (app['PLM'] == "High") {
lm_estimate <- data.frame(lm_l=10^4, lm_ml=5*10^4, lm_h=10^6, lm_conf=1)}
if (app['PLM'] == "Medium") {
lm_estimate <- data.frame(lm_l=5*10^3, lm_ml=10^4, lm_h=5*10^4, lm_conf=1)}
if (app['PLM'] == "Low") {
lm_estimate <- data.frame(lm_l=10^3, lm_ml=2*10^3, lm_h=10^4, lm_conf=1)}
if (app['PLM'] == "Very Low") {
lm_estimate <- data.frame(lm_l=0, lm_ml=2, h=12, conf=1)}
estimate <- cbind(tef_estimate, rs_estimate, lm_estimate, title = as.character(app[['Application.Name']]))
#print(paste("TEF is ", TEF))
return(estimate)
}
scenario_data <- read.csv('app_data.tsv', sep='\t', stringsAsFactors = F)
scenario_data <- apply(scenario_data[,1:4], 1, convert_qual_to_quant)
results <- lapply(scenario_data, function(x) calculate_ALE(scenario = x[1:12], title = x$title, verbose = TRUE))
summary_data <- do.call(rbind, lapply(results, function(x) {
data.frame(title = x[['title']],
ALEmedian = median(x[['ALEsamples']]),
ALEmax = max(x[['ALEsamples']])
)
}
))
# scatter plot of median vs. max ALEs
gg <- ggplot(summary_data, aes(x = ALEmedian, y = ALEmax))
gg <- gg + geom_point(postion = "jitter", alpha = 0.1)
#gg <- gg + stat_bin2d(bins = 50) + scale_fill_gradient(low = 'lightblue', high = 'red')
gg <- gg + stat_binhex(bins = 50) + scale_fill_gradient(low = 'lightblue', high = 'red')
#gg <- gg + geom_text(aes(label = title), size = 4)
gg <- gg + scale_x_log10(labels = comma) + scale_y_log10(labels = comma)
gg <- gg + theme_bw()
print(gg)
# histogram of median ALEs
gg <- ggplot(summary_data, aes(x = ALEmedian))
gg <- gg + geom_histogram(binwidth = diff(range(summary_data$ALEmedian) / 50), aes(y = ..density..), color = "black", fill = "white")
gg <- gg + geom_density(fill = "steelblue", alpha = 1/3)
gg <- gg + scale_x_continuous(labels = comma) + scale_y_continuous(labels = comma)
gg <- gg + theme_bw()
print(gg)
# create histogram
#gg <- ggplot(ALEsamples, aes(x = ALEsamples))
#gg <- gg + geom_histogram(binwidth = diff(range(ALEsamples$ALEsamples)/50), aes(y = ..density..), color = "black", fill = "white")
#gg <- gg + geom_density(fill = "steelblue", alpha = 1/3)
#gg <- gg + scale_x_continuous(labels = comma)
#gg <- gg + theme_bw()
#gg <- gg + geom_point(aes(y=cumloss)) + geom_path(aes(y=cumloss, group=1))
#print(gg)
@davidski

This comment has been minimized.

Copy link
Owner

davidski commented Jan 20, 2014

This would probably be better if re-written to use the native model capabilities of mc2d (already loaded for use of the rpert function). When using mc2d, the provided sensitivity analysis items would be available for use. I'd like to be generate nice tornado diagrams to show what's most important in a given scenario.

@davidski

This comment has been minimized.

Copy link
Owner

davidski commented Feb 13, 2015

For a shiny version of this code, see http://shiny.dds.ec/solvomediocris/

@OSUso

This comment has been minimized.

Copy link

OSUso commented Mar 19, 2018

Hi David,
I'm an absolute beginner in R and trying to understand the code.
I'm trying to understand why you use sapply() for example in calculating ALE instead of multiplying the two distributions like in the example Jay was giving (http://datadrivensecurity.info/blog/posts/2014/Jan/severski/).

thanks
Osama S.

@davidski

This comment has been minimized.

Copy link
Owner

davidski commented Mar 21, 2018

Hi, @OSUso! Had to go back and reacquaint myself with this old code. This is a quick 5 minute review. 😄

Jay's version multiples the #er of losses in a given year by the size of those losses. The vectorization technique Jay used is fantastic and super easy to use, but assumes that each loss in a given period is the exact same size (i.e. 5 losses in a given year would all have $1,000 sizes , resulting in a total of 5 * 1000 = $5,000 losses). The above code, and the sampleL assumes that losses can vary in a given year (5 losses in a year may have loss sizes of 900, 1000, 1.1K, 1050, and 950).

I strongly encourage you to take a look at Evaluator (https://evaluator.severski.net) which is the descendant of this original code. Much more robust modelling, data input, reporting and more in there!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment