Skip to content

Instantly share code, notes, and snippets.

@9b
Created February 8, 2024 13:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 9b/ed1b9eb4f8eb30eafad4d04c77377ded to your computer and use it in GitHub Desktop.
Save 9b/ed1b9eb4f8eb30eafad4d04c77377ded to your computer and use it in GitHub Desktop.
Extract MITRE ATT&CK techniques into a file.
"""Extract MITRE ATT&CK techniques into a file."""
import bs4 as bs
import requests
root_url = "https://attack.mitre.org"
file_name = "mitre.txt"
def get_urls():
"""Get MITRE ATT&CK URLs for processing."""
urls = list()
url = "https://attack.mitre.org/techniques/enterprise/"
response = requests.get(url)
soup = bs.BeautifulSoup(response.content,'html.parser')
rows = soup.find_all('tr', {'class': 'technique'})
for row in rows:
select = row.select_one(":nth-child(2)")
full_url = root_url + select.find('a')['href']
urls.append(full_url)
# print(full_url)
return urls
def derive_id(url):
"""Use the URL to derive the technique ID."""
parts = url.split('/')
save = None
for idx, item in enumerate(parts):
if item != 'techniques':
continue
save = parts[idx+1:]
return '.'.join([x for x in save if x != ''])
def extract_content(url):
"""Extract content from the HTML to form our structure."""
# print("PROCESSING:", url)
tmp = {'url': url, 'id': derive_id(url), 'label': None, 'description': None, 'examples': list()}
response = requests.get(url)
soup = bs.BeautifulSoup(response.content,'html.parser')
container = soup.find('div', {'class':'container-fluid'})
label = container.find('h1').text.replace('\n', '').replace(' ', '')
tmp['label'] = label
content = container.find('div', {'class': 'description-body'})
description = ' '.join([x.text for x in content.find_all('p')])
tmp['description'] = description
content = soup.find('h2', {'id': 'examples'})
if not content:
return tmp
content = content.findNext('table')
examples = list()
rows = content.find_all('tr')
for row in rows[1:]:
select = row.select_one(":nth-child(3)").find('p')
examples.append(select.text)
tmp['examples'] = examples
return tmp
if __name__ == "__main__":
"""I process, therefore I am."""
mapping = list()
urls = get_urls()
f = open(file_name, "w")
for url in urls:
item = extract_content(url)
f.write("Technique:\n %s (%s)\n" % (item['label'], item['id']))
f.write("\n")
f.write("Description:\n %s\n" % item['description'])
f.write("\n")
f.write("Examples: \n%s\n" % '\n'.join(item['examples']))
f.write("\n")
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment