Skip to content

Instantly share code, notes, and snippets.

@shellcromancer
Last active November 17, 2022 15:39
Show Gist options
  • Save shellcromancer/4fdcfc942f46e4375fe3190c15779452 to your computer and use it in GitHub Desktop.
Save shellcromancer/4fdcfc942f46e4375fe3190c15779452 to your computer and use it in GitHub Desktop.
Pull all malware samples from https://objective-see.com/malware.html into a nice tree structure by sample type and unzip them.
#!/usr/bin/env python3
from collections import defaultdict
import io
import os
import requests
import sys
from typing import Dict, List
import zipfile
SAMPLES_URL = r"https://objective-see.com/malware.json"
SAMPLES_PASSWORD = b"infect3d"
try:
response = requests.get(SAMPLES_URL)
response.raise_for_status()
except requests.RequestException as e:
sys.exit(e)
data = response.json().get('malware', [])
sample_count = len(data)
print(f'Got {sample_count} macOS malware samples.')
bucketed_samples: Dict[str, List[Dict[str, str]]] = defaultdict(list)
for sample in data:
sample: Dict[str, str]
sname, stype = sample.get('name'), sample.get(
'type')
if stype is None:
try:
bucketed_samples['general'].append(sample)
except KeyError as e:
sys.exit(f'Issue appended to list for type of "generic": {e}')
else:
try:
bucketed_samples[stype].append(sample)
except KeyError as e:
sys.exit(f'Issue appended to list for type: {e}')
def cannonicalize_name(name: str) -> str:
name = name.lower().replace("(", "")
name = name.replace(")", "")
name = name.replace(")", "")
name = name.replace(",", "")
return name.replace(" ", "_")
for bucket, samples in bucketed_samples.items():
print(f"got samples {len(samples)} of type {bucket}")
base_path = 'macOS/' + cannonicalize_name(bucket)
try:
os.makedirs(base_path)
except FileExistsError:
print(f"folder {base_path} exists so skipped creation")
for i, s in enumerate(samples):
dl_url, name = s.get('download'), s.get('name')
if dl_url is None or dl_url == '#':
print(f'{bucket} - {name} has no download url so skipping')
continue
dl_path = base_path + '/' + cannonicalize_name(name)
if os.path.isdir(dl_path):
print(f'{bucket} - {name} has already been downloaded so skipping')
continue
try:
with requests.get(dl_url, stream=True) as r:
r.raise_for_status()
z = zipfile.ZipFile(io.BytesIO(r.content))
z.setpassword(SAMPLES_PASSWORD)
z.extractall(dl_path)
print(f"downloaded {name}. {i+1}/{len(samples)} of {bucket}")
except (requests.RequestException, zipfile.BadZipFile) as e:
print(f"[! Error] failed to download {bucket} - {name}: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment