This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("manifesto_functions.R") | |
################ | |
### Introduction | |
patches <- 0:20 | |
average <- 23/4 | |
plot(p, dpois(patches, average), type="h", lwd=3, | |
ylab="Probability Of Emergency Patch", | |
xlab="Count Of Emergency Patches") | |
################################ | |
### Let’s Make Some Data: Part 1 | |
# Let's Code It | |
hits <- GetBeliefsHits(500, 900, 1000) | |
ggplotBeta(hits[1], hits[2], "Possibility", "Plausibility", "Knowing Nothing","All Possibility Equally Plausible") | |
# Ask The Expert | |
expert <- GetBeliefsHits(240,290,1000) | |
MakeBetaGraph(expert[1], expert[2],"Possibility","Plausibility","Knowing Something","Not All Possibilities Equally Plausible") | |
posterior <- distribution_beta(1000, expert[1], expert[2]) | |
head(posterior) | |
ci_hdi <- ci(posterior, method = "HDI") | |
ci_hdi | |
posterior %>% estimate_density(extend = TRUE) | |
################################ | |
### Let’s Make Some Data: Part 2 | |
dt <- GetSecResults() | |
dt | |
# Default Parameters for GetSecResults() | |
events = c(.05,.10,.2) | |
event_corr = 0.3 | |
patch_fail = 1 | |
# Basic Team Structure | |
team_count <- 6 | |
days <- 365 | |
total <- days * team_count | |
team <- defData(varname = "team", dist = "nonrandom", | |
formula = 0, id = "idTeam") | |
team <- defData(team,varname = "nDays",dist = "nonrandom", | |
formula = days) | |
team <- genData(team_count, team) | |
team | |
# Expand Team Rows | |
team <- genCluster(team, | |
cLevelVar = "idTeam", | |
numIndsVar = "nDays", | |
level1ID = "id") | |
team | |
# Add days | |
team$nDays <- (team$id - (365*(team$idTeam - 1))) | |
team | |
# Creating The Vulnerability Data Structure | |
dt <- genCorGen(n = total, | |
nvars = 3, | |
rho = 0.30, | |
dist = "binary", | |
params1 = c(0.05, 0.15, 0.20), | |
cnames = "extreme_event,critical_event,high_event", | |
corstr = "cs", | |
wide = TRUE, | |
method = "ep", | |
id = "id") | |
dt | |
# Vulnerability Counts | |
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1, | |
corst = "id", param1 = "extreme_event", cnames = "extreme", | |
rho= 0.40) | |
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1, | |
corst = "id", param1 = "critical_event", | |
cnames = "critical", rho= 0.40) | |
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1, | |
corst = "id", param1 = "high_event", cnames = "high", | |
rho= 0.40) | |
#Event showed 1 but poisson returned 0 - Make it at least 1 | |
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1 | |
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1 | |
dt$high[dt$high_event == 1 & dt$high == 0] <- 1 | |
head(dt,10) | |
# Merge Team and Vulnerability | |
dt <- mergeData(team, dt, "id") | |
######################### | |
### Enriching And Tidying | |
# Add Risk Dimensions: Audit | |
dt$audit <- if_else(dt$id > total/2,1,0) | |
#Set Exposure Type | |
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external" | |
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner" | |
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal" | |
#Add Exposure ID Field | |
dt$exposure_id[dt$exposure == "external"] <- 1 | |
dt$exposure_id[dt$exposure == "partner"] <- 2 | |
dt$exposure_id[dt$exposure == "internal"] <- 3 | |
# View Combinations | |
dt %>% select(idTeam,exposure, exposure_id, audit) %>% distinct() | |
# Add Identifiers: Create Names | |
dt <- genFactor(dt, "idTeam", | |
labels = c("external audit", "partner audit", "internal audit", | |
"external no audit", "partner no audit", | |
"internal no audit")) | |
#Copy the team name over and get a clean dataframe. | |
dt$team <- dt$fidTeam | |
dt | |
# Tidy Data: Melting Data | |
dt <- dt %>% select(idTeam, team, nDays, id, extreme, critical, high, | |
audit, exposure, exposure_id) | |
#Need To move melt to later as it messes with defCondition | |
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","audit", | |
"exposure","exposure_id" ), | |
measure.vars = c("extreme", "critical", "high"), | |
variable.name = "severity", value.name = "hits") | |
dt | |
# Tidy Data: One Vulnerability Per Row | |
#Get One Vuln Per Row i.e. it drops zeros... | |
dt <- dt[rep(1:.N,hits)] | |
dt$hits <- 1 | |
#Reset ID field | |
dt$id <- 1:length(dt$idTeam) | |
setkey(dt,id) | |
# Remediation Timing: Severity Levels | |
# Make numeric for formula input | |
dt$severity_id[dt$severity == "extreme"] <- 1 | |
dt$severity_id[dt$severity == "critical"] <- 2 | |
dt$severity_id[dt$severity == "high"] <- 3 | |
# Remediation Timing: Create Time Formulas | |
# Set Fix Time Based On Vuln, Asset and Audit | |
defFix <- defDataAdd(varname = "fix_time", | |
formula = "((audit * 3 + severity_id * 3 + exposure_id * 3) | |
* (severity_id*1.43))", | |
dist = "noZeroPoisson") | |
dt <- addColumns(defFix, dt) | |
# Plot example | |
plot(0:20,dpois(0:20,8.58), type = "h", lwd=3, | |
ylab="Probability",xlab="Remediation Days") | |
# View subset | |
dt %>% select(id, idTeam, team,audit, exposure, severity,fix_time) | |
# Remediation Time: Additional Time Values | |
#Fix Date: Day Number for Fix Date | |
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + | |
nDays", dist = "nonrandom") | |
dt <- addColumns(defFixDate, dt) | |
#Chance of Something NOT getting fixed goes up with more time. | |
defStatus <- defDataAdd(varname = "status", | |
formula = "(fix_time/1.25)/365", | |
dist = "binary") | |
dt <- addColumns(defStatus, dt) | |
#Add date stamps for first seen and last seen | |
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, | |
origin = "2020-01-01"), | |
as.Date(dt$nDays, origin = "2021-01-01")) | |
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, | |
origin = "2020-01-01"), | |
as.Date(dt$fix_date, origin = "2021-01-01")) | |
#Add week number for simplicity | |
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), | |
as.integer((week(dt$first.seen) + 52))) | |
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), | |
as.integer(week(dt$last.seen) + 52)) | |
####################### | |
### Advanced Simulation | |
#Product Team With Default Settings | |
dtGroupOne <- GetSecResults() | |
dtGroupOne$group <- 1 | |
#Product Team With Lots of Vulns, Low Remediation | |
dtGroupTwo <- GetSecResults(c(.10, .15, .30), .4, .5) | |
dtGroupTwo$group <- 2 | |
#Product Team With Less vulns, high remediation rate | |
dtGroupThree <- GetSecResults(c(.05, .08, .12), .4, 3) | |
dtGroupThree$group <- 3 | |
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree) | |
# View | |
dt %>% select(group,idTeam,team,exposure, exposure_id, audit) %>% distinct() | |
# Subset | |
dt3 <- dt[!(dt$group == 3 & audit == 0)] | |
dt3 %>% select(group,idTeam,team,exposure, exposure_id, audit) %>% filter(group == 3) %>% distinct() | |
# Quick Data Exploration | |
# Range for all closed vulnerabilities | |
range(dt[dt$status == 0,fix_time]) | |
# Quantiles for all closed vulnerabilities. | |
quantile(dt[dt$status == 0,fix_time]) | |
# Range for most critical, exposed and under audit vulns that are closed. | |
range(dt[(dt$status == 0 & exposure_id == 1 & severity_id == 1 & | |
audit == 0) ,fix_time]) | |
# Quantiles for the same as above. | |
quantile(dt[(dt$status == 0 & exposure_id == 1 & severity_id == 1 & | |
audit == 0) ,fix_time]) | |
############################ | |
### Advanced Data Generation | |
#High Risk, Early In Year, High Dev Maturity | |
betaFormula(0,1,1,.3,5,4) | |
#High Risk, Late In Year, High Dev Maturity | |
betaFormula(0,1,1,.3,300,4) | |
#High Risk, Middle Of year, High Maturity | |
betaFormula(0,1,1,.3,150,4) | |
#High Risk, Early Days, Lowest Dev Maturity | |
betaFormula(0,1,1,.3,5,1) | |
#High Risk, High Days, Lowest Maturity | |
betaFormula(0,1,1,.3,300,1) | |
#High Risk, Medium Days, Lowest Maturity | |
betaFormula(0,1,1,.3,150,1) | |
################################### | |
### Calls within GetAdvSecResults() | |
dt <- GetAdvSecResults() | |
# Defaults wtihing GetAdvSecResults() | |
events = c(.02,.10,.2) | |
patch_fail = 1 | |
dev_maturity = 4 | |
team_count <- 6 | |
days <- 365 | |
total <- days * team_count | |
eventSum <- sum(events) | |
# TEAM SECTION | |
# Create 6 Groups of 365 Each. | |
team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam") | |
team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days) | |
team <- defData(team, varname = "dev_maturity", dist = "nonrandom", formula = "..dev_maturity") | |
team <- genData(team_count, team) | |
team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id") | |
#Put days into the nDays colums | |
team$nDays <- (team$id - (365*(team$idTeam - 1))) | |
######################## | |
## Vulnerability Section | |
# Probablity each day will have one or more extreme, critical or high risk events. | |
# Mild correlation between the variables | |
dt <- genData(n=team_count) | |
dt <- addPeriods(dt, days,perName = "nDays") | |
dt$nDays <- dt$nDays + 1 | |
defExtEvent <- defDataAdd(varname = "extreme_event", formula = "if_else((..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2)) < 0, .005,..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2))", dist = "binary") | |
dt <- addColumns(defExtEvent, dt) | |
defCritEvent <- defDataAdd(varname = "critical_event", formula = "if_else((..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2)) < 0, .005,..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2))", dist = "binary") | |
dt <- addColumns(defCritEvent, dt) | |
defHighEvent <- defDataAdd(varname = "high_event", formula = "if_else((..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2)) < 0, .005,..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2))", dist = "binary") | |
dt <- addColumns(defHighEvent, dt) | |
dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event, dev_maturity) | |
# Reset ID column | |
setkey(dt,id) | |
# For each day with an Ext,Crit,High events randomly draw a small number of hits for each | |
# These are slightly more collelated. | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "extreme_event", cnames = "extreme") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "critical_event", cnames = "critical") | |
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs", | |
dist = "poisson", param1 = "high_event", cnames = "high") | |
#view(dt) | |
#Event showed 1 but poisson returned 0 - Make it at least 1 | |
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1 | |
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1 | |
dt$high[dt$high_event == 1 & dt$high == 0] <- 1 | |
## Asset Section | |
#Merge the Team/Days data with the hits/events data | |
dt <- mergeData(team, dt, "id") | |
#Add Exposure Categories for Audit and Exposure Type | |
dt$audit <- if_else(dt$id > total/2,1,0) | |
#Set Exposure Type | |
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external" | |
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner" | |
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal" | |
#Add Exposure ID Field | |
dt$exposure_id[dt$exposure == "external"] <- 1 | |
dt$exposure_id[dt$exposure == "partner"] <- 2 | |
dt$exposure_id[dt$exposure == "internal"] <- 3 | |
#Add Names based on audit and exposure type - note that this is just text, idTeam is most important. | |
dt <- genFactor(dt, "idTeam", | |
labels = c("external audit", "partner audit", "internal audit", | |
"external no audit", "partner no audit", "internal no audit")) | |
#Copy team name over and get a clean dataframe. | |
dt$team <- dt$fidTeam | |
## Select important columns | |
dt <- dt %>% select(idTeam, team, nDays, id, dev_maturity, extreme, critical, high, audit, exposure, exposure_id) | |
#Need To move melt to later as it messes with defCondition | |
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","dev_maturity","audit","exposure","exposure_id" ), | |
measure.vars = c("extreme", "critical", "high"), | |
variable.name = "severity", value.name = "hits") | |
#Cache records with Zeros for Poisson | |
dt$hits <- if_else(is.na(dt$hits),0,dt$hits) | |
#Get One Vuln Per Row i.e. it drops zeros... | |
dt <- dt[rep(1:.N,hits)] | |
#Reset ID field | |
dt$id <- 1:length(dt$idTeam) | |
setkey(dt,id) | |
###### | |
## Remediation Time Section | |
# Make numeric for formula input | |
dt$severity_id[dt$severity == "extreme"] <- 1 | |
dt$severity_id[dt$severity == "critical"] <- 2 | |
dt$severity_id[dt$severity == "high"] <- 3 | |
#Ranomom Time adjustment roughly based on event volume. Used for Fix Time Generator | |
defTimeAdj <- defDataAdd(varname = "time_adj", formula = eventSum, variance = .1, dist = "normal") | |
dt <- addColumns(defTimeAdj, dt) | |
dt$time_adj <- if_else(dt$time_adj <= 0 , .01, dt$time_adj) | |
#Get Probablity Associated With Time from Beta Dist | |
betaDef <- defDataAdd(varname = "beta_prob", | |
formula = "betaFormula(audit,severity_id, exposure_id,time_adj, nDays, dev_maturity)", | |
variance = "betaVariance(audit,severity_id, exposure_id,time_adj)", | |
dist = "beta") | |
#Add column with list of probabilities | |
dt <- addColumns(betaDef, dt) | |
# Use beta probabilities as input into binaomial for time to fix | |
binDef <- defDataAdd(varname = "fix_time", | |
formula = "beta_prob", | |
variance = "binomVariance(audit,severity_id, exposure_id,time_adj)", | |
dist = "binomial" | |
) | |
#Create column for time to fix. | |
dt <- addColumns(binDef, dt) | |
#Update any time to fix zero values with a 1 | |
dt$fix_time[dt$fix_time == 0] <- 1 | |
#Fix Date | |
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom") | |
dt <- addColumns(defFixDate, dt) | |
#Chance of Something NOT getting fixed goes up with more time. | |
defStatus <- defDataAdd(varname = "status", formula = "if_else((fix_time/..patch_fail)/365 > 1,1,(fix_time/..patch_fail)/365)", dist = "binary") | |
dt <- addColumns(defStatus, dt) | |
#Add date stamps for first seen and last seen | |
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"), | |
as.Date(dt$nDays, origin = "2021-01-01")) | |
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"), | |
as.Date(dt$fix_date, origin = "2021-01-01")) | |
#Add week number for simplicity | |
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52))) | |
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52)) | |
View(dt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment