Skip to content

Instantly share code, notes, and snippets.

@MSAdministrator
Created January 7, 2022 02:33
Show Gist options
  • Save MSAdministrator/0b7f33b785320de093b70861bfd7b1e0 to your computer and use it in GitHub Desktop.
Save MSAdministrator/0b7f33b785320de093b70861bfd7b1e0 to your computer and use it in GitHub Desktop.
Download Malicious Files from malware-traffic-analysis.net
import os
import requests
from bs4 import BeautifulSoup
import time
import re
import io
from zipfile import ZipFile
EXTENSION_LIST = [
'doc',
'xls',
'ppt',
'cab',
'exe',
'pe',
'apk',
'macos',
'elf',
'dylib', # Mach-O/Unibin
'bundle', # Mach-O/Unibin
'dmg',
'jar',
'pdf',
'eicar',
'swf'
]
class MaliciousSamples:
DATE_RE = re.compile('^\d{4}-\d{2}-\d{2}$')
MALWARE_TRAFFIC_ANALYSIS_URL = 'https://malware-traffic-analysis.net'
def malware_traffic_analysis(self,
years=['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021'],
extensions=EXTENSION_LIST,
save_path=os.path.dirname(os.path.abspath(__file__))
):
self.extensions = extensions
date_links = []
for year in years:
year_url = '{}/{}/index.html'.format(self.MALWARE_TRAFFIC_ANALYSIS_URL, year)
year_response = requests.get(year_url)
if year_response.status_code == 200:
year_text = year_response.text
soup = BeautifulSoup(year_text, 'lxml')
year_links = soup.find_all('a')
for year_link in year_links:
text = year_link.get_text()
href = year_link.get('href')
if self.DATE_RE.match(text):
link = '{}/{}/{}'.format(self.MALWARE_TRAFFIC_ANALYSIS_URL, year, href)
date_links.append(link)
malware_zip_links = []
for date_url in date_links:
time.sleep(1)
date_folder = '/'.join(date_url.split('/')[0:-1])
folder_path =date_folder.split(self.MALWARE_TRAFFIC_ANALYSIS_URL)[-1].strip()
folder_path = save_path + folder_path
if not os.path.exists(folder_path):
os.makedirs(folder_path)
date_response = requests.get(date_url)
if date_response.status_code == 200:
date_text = date_response.text
soup = BeautifulSoup(date_text, 'lxml')
links = soup.find_all('a')
for link in links:
text = link.get_text().upper()
if 'MALWARE' in text and '.ZIP' in text:
href = link.get('href')
malware_link = '{}/{}'.format(date_folder, href)
malware_request = requests.get(malware_link)
if malware_request.status_code == 200:
contents = malware_request.content
filebytes = io.BytesIO(contents)
zf = ZipFile(filebytes)
if 'all' in self.extensions:
zf.extractall(path=folder_path, pwd=b'infected')
else:
for name in zf.namelist():
for extension in self.extensions:
if name.endswith(extension):
zf.extract(name, folder_path, pwd=b'infected')
malware_zip_links.append(folder_path)
return malware_zip_links
def test_files(self):
'''
If you have SSL decryption enabled on the firewall, use one of the following URLs:
PE—https://wildfire.paloaltonetworks.com/publicapi/test/pe
APK—https://wildfire.paloaltonetworks.com/publicapi/test/apk
MacOSX—https://wildfire.paloaltonetworks.com/publicapi/test/macos
ELF—https://wildfire.paloaltonetworks.com/publicapi/test/elf
If you do not have SSL decryption enabled on the firewall, use one of the following URLs instead:
PE—http://wildfire.paloaltonetworks.com/publicapi/test/pe
APK—http://wildfire.paloaltonetworks.com/publicapi/test/apk
MacOSX—http://wildfire.paloaltonetworks.com/publicapi/test/macos
ELF—http://wildfire.paloaltonetworks.com/publicapi/test/elf
'''
print(MaliciousSamples().malware_traffic_analysis())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment