Created
May 28, 2013 00:23
-
-
Save ikotler/5659736 to your computer and use it in GitHub Desktop.
Simple Malware AnaLysis Language (SMALL), a toy domain-specific language (DSL) I have developed on top of Pythonect for 'Hacking Like It's 2013' Workshop/Talk. See: http://www.ikotler.org/HackLikeIts2013_BLR.pdf for Slides & Usage Example.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Copyright (C) 2013 Itzik Kotler | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
import re | |
import sys | |
import os | |
import base64 | |
import zlib | |
import string | |
import subprocess | |
import collections | |
import StringIO | |
import hashlib | |
import lxml.etree | |
__version__ = "0.1" | |
# Consts | |
IPV4_ADDRESS = re.compile(r'\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b') | |
BASE64_STR = re.compile(r'\b(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})\b') | |
# Miscellaneous | |
def _dict2xmlElement(d, name='item'): | |
xml_node = lxml.etree.Element(name) | |
for (k, v) in d.items(): | |
new_v = v | |
if isinstance(v, int): | |
new_v = str(v) | |
if isinstance(v, list): | |
xml_group = lxml.etree.Element(k.lower()) | |
for item in v: | |
if isinstance(item, dict): | |
xml_group.append(_dict2xmlElement(item, k.lower() + '_item')) | |
else: | |
lxml.etree.SubElement(xml_group, 'item').text = item | |
xml_node.append(xml_group) | |
else: | |
lxml.etree.SubElement(xml_node, k.lower()).text = new_v | |
return xml_node | |
# I/O | |
def from_file(filename): | |
raw = open(filename, 'r').read() | |
return dict(DATA=raw, EXTENSION='bin', METADATA=collections.OrderedDict(FILENAME=filename, FILESIZE=len(raw))) | |
def from_dir(dirname): | |
outputs = [] | |
try: | |
root, dirs, files = os.walk(dirname).next() | |
for file_ in files: | |
outputs.append(from_file(os.path.join(dirname, file_))) | |
except Exception as e: | |
pass | |
return outputs | |
def to_xmlfile(data): | |
f = open(data['METADATA']['FILENAME'] + '.xml', 'w') | |
f.write(to_xml(data)) | |
f.close() | |
return '' | |
# Formats | |
def to_xml(data): | |
return lxml.etree.tostring(_dict2xmlElement(data['METADATA']), pretty_print=True) | |
# File hashs | |
def md5sum(data): | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], MD5SUM=hashlib.md5(data['DATA']).hexdigest())) | |
def sha1sum(data): | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA1SUM=hashlib.sha1(data['DATA']).hexdigest())) | |
def sha256sum(data): | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA256SUM=hashlib.sha256(data['DATA']).hexdigest())) | |
def sha512sum(data): | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA512SUM=hashlib.sha512(data['DATA']).hexdigest())) | |
def crc32(data): | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], CRC32=format(zlib.crc32(data['DATA']) & 0xFFFFFFFF, '08x'))) | |
# File info | |
def file_type(data): | |
output = subprocess.check_output(['/usr/bin/file', data['METADATA']['FILENAME']]) | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], FILETYPE=output.split(':')[1].split(',')[0].strip())) | |
# Embedded content | |
def extract_ipv4_addresses(data): | |
ipv4_addrs = [] | |
for ipv4_addr in IPV4_ADDRESS.findall(data['DATA']): | |
ipv4_addrs.append('.'.join(ipv4_addr)) | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], IPV4_ADDRESSES=data['METADATA'].get('IPV4_ADDRESSES', []) + ipv4_addrs)) | |
def extract_base64_strings(data): | |
base64_strs = [] | |
for base64_str in BASE64_STR.findall(data['DATA']): | |
try: | |
decoded_base64_str = base64.b64decode(base64_str) | |
printable = all(c in string.printable for c in decoded_base64_str) | |
if not printable: | |
decoded_base64_str = decoded_base64_str.encode('hex') | |
base64_strs.append({'printable': str(printable), 'encoded_value': base64_str, 'encoded_len': len(base64_str), 'decoded_value': decoded_base64_str, 'decoded_len': len(decoded_base64_str)}) | |
except TypeError: | |
pass | |
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], BASE64_STRINGS=data['METADATA'].get('BASE64_STRINGS', []) + base64_strs)) | |
def draw_msgbox(string): | |
msgbox = '\n' | |
msgbox += '/' * 80 + '\n' | |
msgbox += '/' + ' '.ljust(78) + '/' + '\n' | |
msgbox += '/' + ' '.ljust(78) + '/' + '\n' | |
msgbox += '/ ' + string.ljust(76) + '/' + '\n' | |
msgbox += '/' + ' '.ljust(78) + '/' + '\n' | |
msgbox += '/' + ' '.ljust(78) + '/' + '\n' | |
msgbox += '/' * 80 + '\n' | |
return msgbox | |
if __name__ == "SMALL": | |
print draw_msgbox('Welcome to [S]imple [M]alware [A]na[L]ysis [L]anguage (SMALL) Interpreter') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment