Skip to content

Instantly share code, notes, and snippets.

@ribsy
Created April 19, 2022 02:04
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ribsy/2f2b8cf30dd79c6ea6e6a6383ad5e02c to your computer and use it in GitHub Desktop.
Save ribsy/2f2b8cf30dd79c6ea6e6a6383ad5e02c to your computer and use it in GitHub Desktop.
source("manifesto_functions.R")
################
### Introduction
patches <- 0:20
average <- 23/4
plot(p, dpois(patches, average), type="h", lwd=3,
ylab="Probability Of Emergency Patch",
xlab="Count Of Emergency Patches")
################################
### Let’s Make Some Data: Part 1
# Let's Code It
hits <- GetBeliefsHits(500, 900, 1000)
ggplotBeta(hits[1], hits[2], "Possibility", "Plausibility", "Knowing Nothing","All Possibility Equally Plausible")
# Ask The Expert
expert <- GetBeliefsHits(240,290,1000)
MakeBetaGraph(expert[1], expert[2],"Possibility","Plausibility","Knowing Something","Not All Possibilities Equally Plausible")
posterior <- distribution_beta(1000, expert[1], expert[2])
head(posterior)
ci_hdi <- ci(posterior, method = "HDI")
ci_hdi
posterior %>% estimate_density(extend = TRUE)
################################
### Let’s Make Some Data: Part 2
dt <- GetSecResults()
dt
# Default Parameters for GetSecResults()
events = c(.05,.10,.2)
event_corr = 0.3
patch_fail = 1
# Basic Team Structure
team_count <- 6
days <- 365
total <- days * team_count
team <- defData(varname = "team", dist = "nonrandom",
formula = 0, id = "idTeam")
team <- defData(team,varname = "nDays",dist = "nonrandom",
formula = days)
team <- genData(team_count, team)
team
# Expand Team Rows
team <- genCluster(team,
cLevelVar = "idTeam",
numIndsVar = "nDays",
level1ID = "id")
team
# Add days
team$nDays <- (team$id - (365*(team$idTeam - 1)))
team
# Creating The Vulnerability Data Structure
dt <- genCorGen(n = total,
nvars = 3,
rho = 0.30,
dist = "binary",
params1 = c(0.05, 0.15, 0.20),
cnames = "extreme_event,critical_event,high_event",
corstr = "cs",
wide = TRUE,
method = "ep",
id = "id")
dt
# Vulnerability Counts
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1,
corst = "id", param1 = "extreme_event", cnames = "extreme",
rho= 0.40)
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1,
corst = "id", param1 = "critical_event",
cnames = "critical", rho= 0.40)
dt <- addCorGen(dtOld = dt, idvar = "id", dist = "poisson", nvars = 1,
corst = "id", param1 = "high_event", cnames = "high",
rho= 0.40)
#Event showed 1 but poisson returned 0 - Make it at least 1
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1
dt$high[dt$high_event == 1 & dt$high == 0] <- 1
head(dt,10)
# Merge Team and Vulnerability
dt <- mergeData(team, dt, "id")
#########################
### Enriching And Tidying
# Add Risk Dimensions: Audit
dt$audit <- if_else(dt$id > total/2,1,0)
#Set Exposure Type
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external"
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner"
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal"
#Add Exposure ID Field
dt$exposure_id[dt$exposure == "external"] <- 1
dt$exposure_id[dt$exposure == "partner"] <- 2
dt$exposure_id[dt$exposure == "internal"] <- 3
# View Combinations
dt %>% select(idTeam,exposure, exposure_id, audit) %>% distinct()
# Add Identifiers: Create Names
dt <- genFactor(dt, "idTeam",
labels = c("external audit", "partner audit", "internal audit",
"external no audit", "partner no audit",
"internal no audit"))
#Copy the team name over and get a clean dataframe.
dt$team <- dt$fidTeam
dt
# Tidy Data: Melting Data
dt <- dt %>% select(idTeam, team, nDays, id, extreme, critical, high,
audit, exposure, exposure_id)
#Need To move melt to later as it messes with defCondition
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","audit",
"exposure","exposure_id" ),
measure.vars = c("extreme", "critical", "high"),
variable.name = "severity", value.name = "hits")
dt
# Tidy Data: One Vulnerability Per Row
#Get One Vuln Per Row i.e. it drops zeros...
dt <- dt[rep(1:.N,hits)]
dt$hits <- 1
#Reset ID field
dt$id <- 1:length(dt$idTeam)
setkey(dt,id)
# Remediation Timing: Severity Levels
# Make numeric for formula input
dt$severity_id[dt$severity == "extreme"] <- 1
dt$severity_id[dt$severity == "critical"] <- 2
dt$severity_id[dt$severity == "high"] <- 3
# Remediation Timing: Create Time Formulas
# Set Fix Time Based On Vuln, Asset and Audit
defFix <- defDataAdd(varname = "fix_time",
formula = "((audit * 3 + severity_id * 3 + exposure_id * 3)
* (severity_id*1.43))",
dist = "noZeroPoisson")
dt <- addColumns(defFix, dt)
# Plot example
plot(0:20,dpois(0:20,8.58), type = "h", lwd=3,
ylab="Probability",xlab="Remediation Days")
# View subset
dt %>% select(id, idTeam, team,audit, exposure, severity,fix_time)
# Remediation Time: Additional Time Values
#Fix Date: Day Number for Fix Date
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time +
nDays", dist = "nonrandom")
dt <- addColumns(defFixDate, dt)
#Chance of Something NOT getting fixed goes up with more time.
defStatus <- defDataAdd(varname = "status",
formula = "(fix_time/1.25)/365",
dist = "binary")
dt <- addColumns(defStatus, dt)
#Add date stamps for first seen and last seen
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays,
origin = "2020-01-01"),
as.Date(dt$nDays, origin = "2021-01-01"))
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date,
origin = "2020-01-01"),
as.Date(dt$fix_date, origin = "2021-01-01"))
#Add week number for simplicity
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen),
as.integer((week(dt$first.seen) + 52)))
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen),
as.integer(week(dt$last.seen) + 52))
#######################
### Advanced Simulation
#Product Team With Default Settings
dtGroupOne <- GetSecResults()
dtGroupOne$group <- 1
#Product Team With Lots of Vulns, Low Remediation
dtGroupTwo <- GetSecResults(c(.10, .15, .30), .4, .5)
dtGroupTwo$group <- 2
#Product Team With Less vulns, high remediation rate
dtGroupThree <- GetSecResults(c(.05, .08, .12), .4, 3)
dtGroupThree$group <- 3
dt <- bind_rows(dtGroupOne, dtGroupTwo, dtGroupThree)
# View
dt %>% select(group,idTeam,team,exposure, exposure_id, audit) %>% distinct()
# Subset
dt3 <- dt[!(dt$group == 3 & audit == 0)]
dt3 %>% select(group,idTeam,team,exposure, exposure_id, audit) %>% filter(group == 3) %>% distinct()
# Quick Data Exploration
# Range for all closed vulnerabilities
range(dt[dt$status == 0,fix_time])
# Quantiles for all closed vulnerabilities.
quantile(dt[dt$status == 0,fix_time])
# Range for most critical, exposed and under audit vulns that are closed.
range(dt[(dt$status == 0 & exposure_id == 1 & severity_id == 1 &
audit == 0) ,fix_time])
# Quantiles for the same as above.
quantile(dt[(dt$status == 0 & exposure_id == 1 & severity_id == 1 &
audit == 0) ,fix_time])
############################
### Advanced Data Generation
#High Risk, Early In Year, High Dev Maturity
betaFormula(0,1,1,.3,5,4)
#High Risk, Late In Year, High Dev Maturity
betaFormula(0,1,1,.3,300,4)
#High Risk, Middle Of year, High Maturity
betaFormula(0,1,1,.3,150,4)
#High Risk, Early Days, Lowest Dev Maturity
betaFormula(0,1,1,.3,5,1)
#High Risk, High Days, Lowest Maturity
betaFormula(0,1,1,.3,300,1)
#High Risk, Medium Days, Lowest Maturity
betaFormula(0,1,1,.3,150,1)
###################################
### Calls within GetAdvSecResults()
dt <- GetAdvSecResults()
# Defaults wtihing GetAdvSecResults()
events = c(.02,.10,.2)
patch_fail = 1
dev_maturity = 4
team_count <- 6
days <- 365
total <- days * team_count
eventSum <- sum(events)
# TEAM SECTION
# Create 6 Groups of 365 Each.
team <- defData(varname = "team", dist = "nonrandom",formula = 0, id = "idTeam")
team <- defData(team,varname = "nDays",dist = "nonrandom", formula = days)
team <- defData(team, varname = "dev_maturity", dist = "nonrandom", formula = "..dev_maturity")
team <- genData(team_count, team)
team <- genCluster(team, cLevelVar = "idTeam", numIndsVar = "nDays", level1ID = "id")
#Put days into the nDays colums
team$nDays <- (team$id - (365*(team$idTeam - 1)))
########################
## Vulnerability Section
# Probablity each day will have one or more extreme, critical or high risk events.
# Mild correlation between the variables
dt <- genData(n=team_count)
dt <- addPeriods(dt, days,perName = "nDays")
dt$nDays <- dt$nDays + 1
defExtEvent <- defDataAdd(varname = "extreme_event", formula = "if_else((..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2)) < 0, .005,..events[1] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 1/2))", dist = "binary")
dt <- addColumns(defExtEvent, dt)
defCritEvent <- defDataAdd(varname = "critical_event", formula = "if_else((..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2)) < 0, .005,..events[2] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 2/2))", dist = "binary")
dt <- addColumns(defCritEvent, dt)
defHighEvent <- defDataAdd(varname = "high_event", formula = "if_else((..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2)) < 0, .005,..events[3] - ((round(nDays/(90 * (4/..dev_maturity)),1)/100) * 3/2))", dist = "binary")
dt <- addColumns(defHighEvent, dt)
dt <- dt %>% mutate(id = timeID) %>% select(id, extreme_event, critical_event, high_event, dev_maturity)
# Reset ID column
setkey(dt,id)
# For each day with an Ext,Crit,High events randomly draw a small number of hits for each
# These are slightly more collelated.
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
dist = "poisson", param1 = "extreme_event", cnames = "extreme")
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
dist = "poisson", param1 = "critical_event", cnames = "critical")
dt <- addCorGen(dtOld = dt, idvar = "id", nvars = 1, rho = .4, corstr = "cs",
dist = "poisson", param1 = "high_event", cnames = "high")
#view(dt)
#Event showed 1 but poisson returned 0 - Make it at least 1
dt$extreme[dt$extreme_event == 1 & dt$extreme == 0] <- 1
dt$critical[dt$critical_event == 1 & dt$critical == 0] <- 1
dt$high[dt$high_event == 1 & dt$high == 0] <- 1
## Asset Section
#Merge the Team/Days data with the hits/events data
dt <- mergeData(team, dt, "id")
#Add Exposure Categories for Audit and Exposure Type
dt$audit <- if_else(dt$id > total/2,1,0)
#Set Exposure Type
dt$exposure[dt$idTeam %in% c(1, 4)] <- "external"
dt$exposure[dt$idTeam %in% c(2, 5)] <- "partner"
dt$exposure[dt$idTeam %in% c(3, 6)] <- "internal"
#Add Exposure ID Field
dt$exposure_id[dt$exposure == "external"] <- 1
dt$exposure_id[dt$exposure == "partner"] <- 2
dt$exposure_id[dt$exposure == "internal"] <- 3
#Add Names based on audit and exposure type - note that this is just text, idTeam is most important.
dt <- genFactor(dt, "idTeam",
labels = c("external audit", "partner audit", "internal audit",
"external no audit", "partner no audit", "internal no audit"))
#Copy team name over and get a clean dataframe.
dt$team <- dt$fidTeam
## Select important columns
dt <- dt %>% select(idTeam, team, nDays, id, dev_maturity, extreme, critical, high, audit, exposure, exposure_id)
#Need To move melt to later as it messes with defCondition
dt <- melt(dt, id.vars = c("idTeam","team","nDays","id","dev_maturity","audit","exposure","exposure_id" ),
measure.vars = c("extreme", "critical", "high"),
variable.name = "severity", value.name = "hits")
#Cache records with Zeros for Poisson
dt$hits <- if_else(is.na(dt$hits),0,dt$hits)
#Get One Vuln Per Row i.e. it drops zeros...
dt <- dt[rep(1:.N,hits)]
#Reset ID field
dt$id <- 1:length(dt$idTeam)
setkey(dt,id)
######
## Remediation Time Section
# Make numeric for formula input
dt$severity_id[dt$severity == "extreme"] <- 1
dt$severity_id[dt$severity == "critical"] <- 2
dt$severity_id[dt$severity == "high"] <- 3
#Ranomom Time adjustment roughly based on event volume. Used for Fix Time Generator
defTimeAdj <- defDataAdd(varname = "time_adj", formula = eventSum, variance = .1, dist = "normal")
dt <- addColumns(defTimeAdj, dt)
dt$time_adj <- if_else(dt$time_adj <= 0 , .01, dt$time_adj)
#Get Probablity Associated With Time from Beta Dist
betaDef <- defDataAdd(varname = "beta_prob",
formula = "betaFormula(audit,severity_id, exposure_id,time_adj, nDays, dev_maturity)",
variance = "betaVariance(audit,severity_id, exposure_id,time_adj)",
dist = "beta")
#Add column with list of probabilities
dt <- addColumns(betaDef, dt)
# Use beta probabilities as input into binaomial for time to fix
binDef <- defDataAdd(varname = "fix_time",
formula = "beta_prob",
variance = "binomVariance(audit,severity_id, exposure_id,time_adj)",
dist = "binomial"
)
#Create column for time to fix.
dt <- addColumns(binDef, dt)
#Update any time to fix zero values with a 1
dt$fix_time[dt$fix_time == 0] <- 1
#Fix Date
defFixDate <- defDataAdd(varname = "fix_date", formula = "fix_time + nDays", dist = "nonrandom")
dt <- addColumns(defFixDate, dt)
#Chance of Something NOT getting fixed goes up with more time.
defStatus <- defDataAdd(varname = "status", formula = "if_else((fix_time/..patch_fail)/365 > 1,1,(fix_time/..patch_fail)/365)", dist = "binary")
dt <- addColumns(defStatus, dt)
#Add date stamps for first seen and last seen
dt$first.seen <- if_else(dt$nDays <= 365, as.Date(dt$nDays, origin = "2020-01-01"),
as.Date(dt$nDays, origin = "2021-01-01"))
dt$last.seen <- if_else(dt$nDays <= 365, as.Date(dt$fix_date, origin = "2020-01-01"),
as.Date(dt$fix_date, origin = "2021-01-01"))
#Add week number for simplicity
dt$week.fseen <- if_else(dt$nDays <= 365, week(dt$first.seen), as.integer((week(dt$first.seen) + 52)))
dt$week.lseen <- if_else(year(dt$last.seen) == 2020, week(dt$last.seen), as.integer(week(dt$last.seen) + 52))
View(dt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment