Created
February 8, 2024 13:34
-
-
Save 9b/ed1b9eb4f8eb30eafad4d04c77377ded to your computer and use it in GitHub Desktop.
Extract MITRE ATT&CK techniques into a file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Extract MITRE ATT&CK techniques into a file.""" | |
import bs4 as bs | |
import requests | |
root_url = "https://attack.mitre.org" | |
file_name = "mitre.txt" | |
def get_urls(): | |
"""Get MITRE ATT&CK URLs for processing.""" | |
urls = list() | |
url = "https://attack.mitre.org/techniques/enterprise/" | |
response = requests.get(url) | |
soup = bs.BeautifulSoup(response.content,'html.parser') | |
rows = soup.find_all('tr', {'class': 'technique'}) | |
for row in rows: | |
select = row.select_one(":nth-child(2)") | |
full_url = root_url + select.find('a')['href'] | |
urls.append(full_url) | |
# print(full_url) | |
return urls | |
def derive_id(url): | |
"""Use the URL to derive the technique ID.""" | |
parts = url.split('/') | |
save = None | |
for idx, item in enumerate(parts): | |
if item != 'techniques': | |
continue | |
save = parts[idx+1:] | |
return '.'.join([x for x in save if x != '']) | |
def extract_content(url): | |
"""Extract content from the HTML to form our structure.""" | |
# print("PROCESSING:", url) | |
tmp = {'url': url, 'id': derive_id(url), 'label': None, 'description': None, 'examples': list()} | |
response = requests.get(url) | |
soup = bs.BeautifulSoup(response.content,'html.parser') | |
container = soup.find('div', {'class':'container-fluid'}) | |
label = container.find('h1').text.replace('\n', '').replace(' ', '') | |
tmp['label'] = label | |
content = container.find('div', {'class': 'description-body'}) | |
description = ' '.join([x.text for x in content.find_all('p')]) | |
tmp['description'] = description | |
content = soup.find('h2', {'id': 'examples'}) | |
if not content: | |
return tmp | |
content = content.findNext('table') | |
examples = list() | |
rows = content.find_all('tr') | |
for row in rows[1:]: | |
select = row.select_one(":nth-child(3)").find('p') | |
examples.append(select.text) | |
tmp['examples'] = examples | |
return tmp | |
if __name__ == "__main__": | |
"""I process, therefore I am.""" | |
mapping = list() | |
urls = get_urls() | |
f = open(file_name, "w") | |
for url in urls: | |
item = extract_content(url) | |
f.write("Technique:\n %s (%s)\n" % (item['label'], item['id'])) | |
f.write("\n") | |
f.write("Description:\n %s\n" % item['description']) | |
f.write("\n") | |
f.write("Examples: \n%s\n" % '\n'.join(item['examples'])) | |
f.write("\n") | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment