Skip to content

Instantly share code, notes, and snippets.

@jedmitten
Created August 18, 2023 16:39
Show Gist options
  • Save jedmitten/b81152eb7485f1544a7efcda30a2e9da to your computer and use it in GitHub Desktop.
Save jedmitten/b81152eb7485f1544a7efcda30a2e9da to your computer and use it in GitHub Desktop.
Parse MITRE ATT&CK Group data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"id": "e5f273c8-fea7-4977-a21f-59af91acdfc1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "1e2ee2e8-36c5-4f2f-ada9-5cdd70135805",
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"\n",
"logging.getLogger(\"requests\").setLevel(logging.WARNING)\n",
"logging.getLogger(\"urllib3\").setLevel(logging.WARNING)\n",
"logging.basicConfig(level=logging.INFO)\n",
"LOGGER = logging.getLogger(\"parse_mitre_groups\")\n",
"\n",
"from urllib.parse import urljoin\n",
"import requests\n",
"from pydantic import BaseModel\n",
"from bs4 import BeautifulSoup"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c55bca76-8968-4d07-a24f-e9ab51ed94ff",
"metadata": {},
"outputs": [],
"source": [
"MITRE_BASE_URL = \"https://attack.mitre.org\"\n",
"MITRE_URL = MITRE_BASE_URL + \"/groups/\"\n",
"resp = requests.get(MITRE_URL)\n",
"if not resp.status_code == 200:\n",
" raise RuntimeError(\"Could not reach the url: \" + MITRE_URL)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "761d2354-644e-40df-8262-6844a0c89387",
"metadata": {},
"outputs": [],
"source": [
"soup = BeautifulSoup(resp.text, \"html.parser\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "0823b2ae-ed4d-43f6-9a67-6acde1495dab",
"metadata": {},
"outputs": [],
"source": [
"# side_navs = soup.find_all(\"div\", {\"class\": \"sidenav-head\"})\n",
"table = soup.find_all(\"table\")[0]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "a9cc5d10-e45f-43d7-8a53-fddfc3c99561",
"metadata": {},
"outputs": [],
"source": [
"tbody = table.find(\"tbody\")\n",
"trows = tbody.find_all(\"tr\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "222962ff-339e-45a8-90fb-753de9e36759",
"metadata": {},
"outputs": [],
"source": [
"cols = [\"id\", \"name\", \"associated-groups\", \"description\"]\n",
"data = []\n",
"for row in trows:\n",
" i = 0\n",
" d = {}\n",
" for h in row.find_all(\"td\"):\n",
" d[cols[i]] = h.get_text().strip()\n",
" LOGGER.debug(f\"d[{cols[i]}] = {d[cols[i]]}\")\n",
" if cols[i] == \"id\":\n",
" # get href\n",
" d[\"url\"] = urljoin(MITRE_BASE_URL, h.a[\"href\"])\n",
" LOGGER.debug(f\"d['url'] = {d['url']}\")\n",
" i += 1\n",
" data.append(d)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "532bd849-17b9-4797-8a50-658b6bd7f0c2",
"metadata": {},
"outputs": [],
"source": [
"def parse_associated_groups(records):\n",
" out = []\n",
" key = \"associated-groups\"\n",
" for r in records:\n",
" r = r.copy() # don't modify source data\n",
" r[key] = [x.strip() for x in r[key].split(\",\")]\n",
" if len(r[key]) == 1 and not r[key][0]:\n",
" r[key] = []\n",
" out.append(r)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "2fab1f3c",
"metadata": {},
"outputs": [],
"source": [
"EXCLUDE_IDS = [\"0-0\"]\n",
"final_data = [x for x in data if x[\"name\"] not in EXCLUDE_IDS]\n",
"final_data = parse_associated_groups(data)\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a1f1c07d-b225-4490-9a71-21828d869360",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'id': 'G0018',\n",
" 'url': 'https://attack.mitre.org/groups/G0018',\n",
" 'name': 'admin@338',\n",
" 'associated-groups': [],\n",
" 'description': 'admin@338 is a China-based cyber threat group. It has previously used newsworthy events as lures to deliver malware and has primarily targeted organizations involved in financial, economic, and trade policy, typically using publicly available RATs such as PoisonIvy, as well as some non-public backdoors.'},\n",
" {'id': 'G0130',\n",
" 'url': 'https://attack.mitre.org/groups/G0130',\n",
" 'name': 'Ajax Security Team',\n",
" 'associated-groups': ['Operation Woolen-Goldfish',\n",
" 'AjaxTM',\n",
" 'Rocket Kitten',\n",
" 'Flying Kitten',\n",
" 'Operation Saffron Rose'],\n",
" 'description': 'Ajax Security Team is a group that has been active since at least 2010 and believed to be operating out of Iran. By 2014 Ajax Security Team transitioned from website defacement operations to malware-based cyber espionage campaigns targeting the US defense industrial base and Iranian users of anti-censorship technologies.'}]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_data[:2]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "de0d2d2b",
"metadata": {},
"outputs": [],
"source": [
"def parse_group_page(record):\n",
" LOGGER.info(\"Requesting TTPs data for group: \" + record[\"id\"])\n",
" resp = requests.get(record[\"url\"])\n",
" if not resp.status_code == 200:\n",
" print(\"Could not access url: \" + record[\"url\"])\n",
" return None\n",
" soup = BeautifulSoup(resp.text, \"html.parser\")\n",
" anchors = soup.find_all(\"a\", {\"class\": \"dropdown-item\"})\n",
" ttps_data = {}\n",
" for a in anchors:\n",
" if a[\"href\"].startswith(\"/group\") and a[\"href\"].endswith(\".json\"):\n",
" dl_link = urljoin(MITRE_BASE_URL, a[\"href\"])\n",
" ttps_resp = requests.get(dl_link)\n",
" if not ttps_resp.status_code == 200:\n",
" LOGGER.warning(\"Could not download TTP data for: \" + record[\"id\"])\n",
" else:\n",
" LOGGER.debug(\"Found TTPs data for: \" + record[\"id\"])\n",
" ttps_data = ttps_resp.json()\n",
" break\n",
" return ttps_data\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d71d437-abba-4f04-8e84-f08a4a0f1db5",
"metadata": {},
"outputs": [],
"source": [
"# now parse from all the urls\n",
"for record in final_data:\n",
" record[\"ttps_data\"] = parse_group_page(record)\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "1bbcd524-f060-4989-9e58-faedf2eb5ada",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"with open(\"mitre_ttps_data_2023-08-18.json\", \"w+\") as f:\n",
" json.dump(final_data, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2bb0e9e5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment