mbrownnycnyc/get-chokepointtechniques.ps1

## get-chokepointtechniques.ps1
# summary:
# Process ATT&CK and D3FEND data to indicate likelihood an ATT&CK technique is a choke point as defined in this article (https://medium.com/mitre-engenuity/where-to-begin-prioritizing-att-ck-techniques-c535b50983f4).  This theory considers the following conjecture: MITRE will assign research hours to align with prioritization of offensive technique (resulting severity, frequency of observation, etc).  This theory considers the following facts: ATT&CK procedure examples are well documented, D3FEND offensive blast radius is well documented.
# Remember to see "Limitations and future work" section of the above blog post/article.
# also see  "BIASES WHEN MAPPING TO MITRE ATT&CK" section of (https://www.cisa.gov/uscert/sites/default/files/publications/Best%20Practices%20for%20MITRE%20ATTCK%20Mapping.pdf)
 # this covers the following biases: Novelty bias, Visibility bias, Producer bias, Victim bias, Availability bias...
  # you won't ever be able to eliminate these, either within yourself, on your team, or from a vendor or organization.  You would hope that the zen masters at MITRE would be a well trained authority on how to tame their own biases.

# I think it's valuable to review the D3FEND offensive technique count to understand blast radius as a separate metric, not combined with ATT&CK procedure example counts as well as a combined score.


<# theory failures:
* does the noise level of an offensive technique mean that it's more likely to cause successful traversal of the attack lifecycle?  The more thiiinnggss the techniques results in, the more successful it will be?
  * Answer: No.
* does every D3FEND offensive technique label have the same ability to increase success of traversing the attack lifecycle?  For example, if an attacker can modify an object file, does it lead to the same level of success as when the attacker modifies a shared library file?
  * Answer: No.

* is our goal to use ATT&CK and D3FEND frameworks to improve, by way of best effort, but not perfect our defensive capabilities so that these theory failures are tolerable?
 * Answer: Yes.
* Can discovering the techniques that are most "blasty" help prioritze protections and detections?
 * Answer: Yes.
* Will controlling and observing use of these techniques protect all the data! from getting in the hands of the evil intergalactic empire who uses pencil tip sized chips to hack your netwerkz?
 * Answer: No.

... but will anything really?

¯\_(ツ)_/¯

#>

#to do
# use multithreaded foreach-object with a concurrent dict instead for att&ck data
# use better enum strategy for d3fend data


### process att&ck data
$attackmatrix = get-content .\cti\enterprise-attack\enterprise-attack.json | convertfrom-json

$alltechniquesandsubs = (($attackmatrix.objects | ? {$_.type -eq "attack-pattern"}).external_references).external_id | ? {$_ -like "T*"} | sort

$technique_data = @()

foreach ($technique in $alltechniquesandsubs ) {
  $enumerator = ($alltechniquesandsubs.IndexOf($technique))
  $total = $alltechniquesandsubs.count

  $percentagecompleted = (($enumerator/$total)*100).tostring()[0..4] -join ""
  write-progress -activity "Extracting technique data from object" -status "$percentagecompleted% completed ($enumerator out of $total)" -percentcomplete $percentagecompleted

  $tempobj = "" | select id, procedure_examples, capec_id

  $tempobj.id = $technique

  $technique_id = ($attackmatrix.objects | ? {`
  $_.type -eq "attack-pattern" -and `
  $_.external_references.external_id -eq $technique }).id

  $tempobj.procedure_examples = ($attackmatrix.objects | ? {`
  $_.type -eq "relationship" -and `
  $_.relationship_type -eq "uses"}) |  ? {$_.target_ref -eq $technique_id}

  $tempobj.capec_id = (($attackmatrix.objects | ? {`
  $_.type -eq "attack-pattern" -and `
  $_.external_references.external_id -eq $technique }).external_references | ? {$_.source_name -eq "capec"}).external_id

  $technique_data += , $tempobj
}


### process d3fend data
$d3fend_data = @()

foreach ($technique in $technique_data) {
  try {
    $techniquerelationships = get-content .\d3fend-gh-pages\api\offensive-technique\attack\$($technique.id).json -ea stop | convertfrom-json
    # Since we’re considering pure quantity: for blast radius we care about: `off_artifact_rel_label`, `off_tactic_rel_label` and `off_artifact_label`
    # for defenses we care about: ` def_tactic_label` and `def_tactic_rel_label`

    write-host "yes d3fend analysis for sub/technique: $($technique.id)"

    $off_artifact_labels = ($techniquerelationships.off_to_def.results.bindings).off_artifact_label.value | sort -unique

    $off_artifact_relationships = ($techniquerelationships.off_to_def.results.bindings).off_artifact_rel_label.value | sort -unique

    # we care about $off_artifacts that are associated with the technique in focus via a $off_artifact_relationships


    $tempobj = "" | select "technique", "d3fend_off_artifact_data"
    $tempobj.technique = $technique.id

    $technique_rel_data = @()
    #this whole double foreach loop thing isn't great, but it works for now.
    foreach ($off_artifact_relationship in $off_artifact_relationships) {
      foreach ($off_artifact_label in $off_artifact_labels) {

        $out = ( ($techniquerelationships.off_to_def.results.bindings) | ? { $_.off_artifact_rel_label.value -eq $off_artifact_relationship -and $_.off_artifact_label.value -eq $off_artifact_label }) | sort -unique
        #the goal is to gather technique relationships where $off_artifact_relationship and $off_artifact_label are present.
          #there really should only be a count of `1` unique relationship entries in the json (or there's an error in the json/d3fend source code

        if ( $out.count -ge 1 ) {


          $tempobj2 = "" | select "off_artifact_relationship","off_artifact"
          $tempobj2.off_artifact_relationship = $out.off_artifact_rel_label.value
          $tempobj2.off_artifact = $out.off_artifact_label.value
          $technique_rel_data += , $tempobj2
        }
      }
    }

    $tempobj.d3fend_off_artifact_data = $technique_rel_data
    $d3fend_data += , $tempobj

    $technique | add-member -force -membertype noteproperty -name "d3fend_off_artifact_data" -value $tempobj.d3fend_off_artifact_data

  } catch {
    write-host "no d3fend analysis for sub/technique: $($technique.id)"
  }
  #$item | add-member -force -membertype noteproperty -name "chokepoint_score" -value $tempscore
}


#create choke point liklihood score given ATT&CK and D3FEND data
foreach ($technique in $technique_data) {

  #progress bars are nice
  $enumerator = ($technique_data.IndexOf($technique))
  $total = $technique_data.count
  $percentagecompleted = (($enumerator/$total)*100).tostring()[0..4] -join ""
  write-progress -activity "Extracting technique data from object" -status "$percentagecompleted% completed ($enumerator out of $total)" -percentcomplete $percentagecompleted


  #1 point for each procedure examples
  $tempscore = $technique.procedure_examples.count

  #i guess here's 3 points for each CAPEC
  $tempscore += $technique.capec_id.count * 3

  #grant 1 point for each "may-" D3FEND relationship
  $tempscore += ($technique.d3fend_off_artifact_data | ? {$_.off_artifact_relationship -match "may-.*"}).count

  #grant 3 points for each definitive D3FEND relationship
  $tempscore += ($technique.d3fend_off_artifact_data | ? {$_.off_artifact_relationship -notmatch "may-.*"}).count * 3

  $technique | add-member -force -membertype noteproperty -name "chokepoint_score" -value $tempscore

}

<#
#calculate distribution of thhhhhiiiinnggggsss coral
Install-Module -Name Statistics -scope currentuser -allowclobber
#https://dille.name/blog/2017/03/21/data-analysis-using-powershell/
#the params need to be corrected, but we'll work with it
$histogramall = Get-Histogram $technique_data -Property chokepoint_score -BucketWidth 1 -BucketCount ($technique_data.count)
$percentile = Get-Histogram $technique_data -Property chokepoint_score -BucketWidth ($technique_data.count/100) -BucketCount 100
$histogram | add-bar | ? {$_.count -ne "0"}
#>


$technique = “T1055” #score is 89
#take this technique as a baseline given the example given in the MITRE article
$baseline = ($technique_data | select id, chokepoint_score, capec_id | ? {$_.id -eq $technique }).chokepoint_score

#score
$technique_data | select id, chokepoint_score, capec_id | ? {$_.chokepoint_score -ge $baseline} | sort chokepoint_score


<#
PS C:\Users\mattb\repos [09:19:11]> $technique_data | select id, chokepoint_score | ? {$_.chokepoint_score -ge $baseline} | sort -desc chokepoint_score

id        chokepoint_score
--        ----------------
T1105                  410
T1027                  343
T1082                  341
T1071.001              316
T1059.003              309
T1083                  291
T1057                  246
T1070.004              239
T1016                  233
T1140                  228
T1547.001              226
T1033                  195
T1005                  175
T1059.001              161
T1036.005              160
T1106                  154
T1204.002              153
T1113                  141
T1112                  141
T1573.001              141
T1053.005              139
T1041                  138
T1056.001              137
T1566.001              127
T1543.003              126
T1518.001              111
T1059.005              109
T1012                  104
T1047                   99
T1132.001               93
T1074.001               92
T1027.002               91
T1055                   89
#>
	# summary:
	# Process ATT&CK and D3FEND data to indicate likelihood an ATT&CK technique is a choke point as defined in this article (https://medium.com/mitre-engenuity/where-to-begin-prioritizing-att-ck-techniques-c535b50983f4). This theory considers the following conjecture: MITRE will assign research hours to align with prioritization of offensive technique (resulting severity, frequency of observation, etc). This theory considers the following facts: ATT&CK procedure examples are well documented, D3FEND offensive blast radius is well documented.
	# Remember to see "Limitations and future work" section of the above blog post/article.
	# also see "BIASES WHEN MAPPING TO MITRE ATT&CK" section of (https://www.cisa.gov/uscert/sites/default/files/publications/Best%20Practices%20for%20MITRE%20ATTCK%20Mapping.pdf)
	# this covers the following biases: Novelty bias, Visibility bias, Producer bias, Victim bias, Availability bias...
	# you won't ever be able to eliminate these, either within yourself, on your team, or from a vendor or organization. You would hope that the zen masters at MITRE would be a well trained authority on how to tame their own biases.

	# I think it's valuable to review the D3FEND offensive technique count to understand blast radius as a separate metric, not combined with ATT&CK procedure example counts as well as a combined score.


	<# theory failures:
	* does the noise level of an offensive technique mean that it's more likely to cause successful traversal of the attack lifecycle? The more thiiinnggss the techniques results in, the more successful it will be?
	* Answer: No.
	* does every D3FEND offensive technique label have the same ability to increase success of traversing the attack lifecycle? For example, if an attacker can modify an object file, does it lead to the same level of success as when the attacker modifies a shared library file?
	* Answer: No.

	* is our goal to use ATT&CK and D3FEND frameworks to improve, by way of best effort, but not perfect our defensive capabilities so that these theory failures are tolerable?
	* Answer: Yes.
	* Can discovering the techniques that are most "blasty" help prioritze protections and detections?
	* Answer: Yes.
	* Will controlling and observing use of these techniques protect all the data! from getting in the hands of the evil intergalactic empire who uses pencil tip sized chips to hack your netwerkz?
	* Answer: No.

	... but will anything really?

	¯\_(ツ)_/¯

	#>

	#to do
	# use multithreaded foreach-object with a concurrent dict instead for att&ck data
	# use better enum strategy for d3fend data




	### process att&ck data
	$attackmatrix = get-content .\cti\enterprise-attack\enterprise-attack.json \| convertfrom-json

	$alltechniquesandsubs = (($attackmatrix.objects \| ? {$_.type -eq "attack-pattern"}).external_references).external_id \| ? {$_ -like "T*"} \| sort

	$technique_data = @()

	foreach ($technique in $alltechniquesandsubs ) {
	$enumerator = ($alltechniquesandsubs.IndexOf($technique))
	$total = $alltechniquesandsubs.count

	$percentagecompleted = (($enumerator/$total)*100).tostring()[0..4] -join ""
	write-progress -activity "Extracting technique data from object" -status "$percentagecompleted% completed ($enumerator out of $total)" -percentcomplete $percentagecompleted

	$tempobj = "" \| select id, procedure_examples, capec_id

	$tempobj.id = $technique

	$technique_id = ($attackmatrix.objects \| ? {`
	$_.type -eq "attack-pattern" -and `
	$_.external_references.external_id -eq $technique }).id

	$tempobj.procedure_examples = ($attackmatrix.objects \| ? {`
	$_.type -eq "relationship" -and `
	$_.relationship_type -eq "uses"}) \| ? {$_.target_ref -eq $technique_id}

	$tempobj.capec_id = (($attackmatrix.objects \| ? {`
	$_.type -eq "attack-pattern" -and `
	$_.external_references.external_id -eq $technique }).external_references \| ? {$_.source_name -eq "capec"}).external_id

	$technique_data += , $tempobj
	}





	### process d3fend data
	$d3fend_data = @()

	foreach ($technique in $technique_data) {
	try {
	$techniquerelationships = get-content .\d3fend-gh-pages\api\offensive-technique\attack\$($technique.id).json -ea stop \| convertfrom-json
	# Since we’re considering pure quantity: for blast radius we care about: `off_artifact_rel_label`, `off_tactic_rel_label` and `off_artifact_label`
	# for defenses we care about: ` def_tactic_label` and `def_tactic_rel_label`

	write-host "yes d3fend analysis for sub/technique: $($technique.id)"

	$off_artifact_labels = ($techniquerelationships.off_to_def.results.bindings).off_artifact_label.value \| sort -unique

	$off_artifact_relationships = ($techniquerelationships.off_to_def.results.bindings).off_artifact_rel_label.value \| sort -unique

	# we care about $off_artifacts that are associated with the technique in focus via a $off_artifact_relationships



	$tempobj = "" \| select "technique", "d3fend_off_artifact_data"
	$tempobj.technique = $technique.id

	$technique_rel_data = @()
	#this whole double foreach loop thing isn't great, but it works for now.
	foreach ($off_artifact_relationship in $off_artifact_relationships) {
	foreach ($off_artifact_label in $off_artifact_labels) {

	$out = ( ($techniquerelationships.off_to_def.results.bindings) \| ? { $_.off_artifact_rel_label.value -eq $off_artifact_relationship -and $_.off_artifact_label.value -eq $off_artifact_label }) \| sort -unique
	#the goal is to gather technique relationships where $off_artifact_relationship and $off_artifact_label are present.
	#there really should only be a count of `1` unique relationship entries in the json (or there's an error in the json/d3fend source code

	if ( $out.count -ge 1 ) {


	$tempobj2 = "" \| select "off_artifact_relationship","off_artifact"
	$tempobj2.off_artifact_relationship = $out.off_artifact_rel_label.value
	$tempobj2.off_artifact = $out.off_artifact_label.value
	$technique_rel_data += , $tempobj2
	}
	}
	}

	$tempobj.d3fend_off_artifact_data = $technique_rel_data
	$d3fend_data += , $tempobj

	$technique \| add-member -force -membertype noteproperty -name "d3fend_off_artifact_data" -value $tempobj.d3fend_off_artifact_data

	} catch {
	write-host "no d3fend analysis for sub/technique: $($technique.id)"
	}
	#$item \| add-member -force -membertype noteproperty -name "chokepoint_score" -value $tempscore
	}





	#create choke point liklihood score given ATT&CK and D3FEND data
	foreach ($technique in $technique_data) {

	#progress bars are nice
	$enumerator = ($technique_data.IndexOf($technique))
	$total = $technique_data.count
	$percentagecompleted = (($enumerator/$total)*100).tostring()[0..4] -join ""
	write-progress -activity "Extracting technique data from object" -status "$percentagecompleted% completed ($enumerator out of $total)" -percentcomplete $percentagecompleted


	#1 point for each procedure examples
	$tempscore = $technique.procedure_examples.count

	#i guess here's 3 points for each CAPEC
	$tempscore += $technique.capec_id.count * 3

	#grant 1 point for each "may-" D3FEND relationship
	$tempscore += ($technique.d3fend_off_artifact_data \| ? {$_.off_artifact_relationship -match "may-.*"}).count

	#grant 3 points for each definitive D3FEND relationship
	$tempscore += ($technique.d3fend_off_artifact_data \| ? {$_.off_artifact_relationship -notmatch "may-."}).count 3

	$technique \| add-member -force -membertype noteproperty -name "chokepoint_score" -value $tempscore

	}

	<#
	#calculate distribution of thhhhhiiiinnggggsss coral
	Install-Module -Name Statistics -scope currentuser -allowclobber
	#https://dille.name/blog/2017/03/21/data-analysis-using-powershell/
	#the params need to be corrected, but we'll work with it
	$histogramall = Get-Histogram $technique_data -Property chokepoint_score -BucketWidth 1 -BucketCount ($technique_data.count)
	$percentile = Get-Histogram $technique_data -Property chokepoint_score -BucketWidth ($technique_data.count/100) -BucketCount 100
	$histogram \| add-bar \| ? {$_.count -ne "0"}
	#>



	$technique = “T1055” #score is 89
	#take this technique as a baseline given the example given in the MITRE article
	$baseline = ($technique_data \| select id, chokepoint_score, capec_id \| ? {$_.id -eq $technique }).chokepoint_score

	#score
	$technique_data \| select id, chokepoint_score, capec_id \| ? {$_.chokepoint_score -ge $baseline} \| sort chokepoint_score


	<#
	PS C:\Users\mattb\repos [09:19:11]> $technique_data \| select id, chokepoint_score \| ? {$_.chokepoint_score -ge $baseline} \| sort -desc chokepoint_score

	id chokepoint_score
	-- ----------------
	T1105 410
	T1027 343
	T1082 341
	T1071.001 316
	T1059.003 309
	T1083 291
	T1057 246
	T1070.004 239
	T1016 233
	T1140 228
	T1547.001 226
	T1033 195
	T1005 175
	T1059.001 161
	T1036.005 160
	T1106 154
	T1204.002 153
	T1113 141
	T1112 141
	T1573.001 141
	T1053.005 139
	T1041 138
	T1056.001 137
	T1566.001 127
	T1543.003 126
	T1518.001 111
	T1059.005 109
	T1012 104
	T1047 99
	T1132.001 93
	T1074.001 92
	T1027.002 91
	T1055 89
	#>