Skip to content

Instantly share code, notes, and snippets.

@ikotler
Created May 28, 2013 00:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikotler/5659736 to your computer and use it in GitHub Desktop.
Save ikotler/5659736 to your computer and use it in GitHub Desktop.
Simple Malware AnaLysis Language (SMALL), a toy domain-specific language (DSL) I have developed on top of Pythonect for 'Hacking Like It's 2013' Workshop/Talk. See: http://www.ikotler.org/HackLikeIts2013_BLR.pdf for Slides & Usage Example.
#!/usr/bin/python
# Copyright (C) 2013 Itzik Kotler
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import re
import sys
import os
import base64
import zlib
import string
import subprocess
import collections
import StringIO
import hashlib
import lxml.etree
__version__ = "0.1"
# Consts
IPV4_ADDRESS = re.compile(r'\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b')
BASE64_STR = re.compile(r'\b(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{4})\b')
# Miscellaneous
def _dict2xmlElement(d, name='item'):
xml_node = lxml.etree.Element(name)
for (k, v) in d.items():
new_v = v
if isinstance(v, int):
new_v = str(v)
if isinstance(v, list):
xml_group = lxml.etree.Element(k.lower())
for item in v:
if isinstance(item, dict):
xml_group.append(_dict2xmlElement(item, k.lower() + '_item'))
else:
lxml.etree.SubElement(xml_group, 'item').text = item
xml_node.append(xml_group)
else:
lxml.etree.SubElement(xml_node, k.lower()).text = new_v
return xml_node
# I/O
def from_file(filename):
raw = open(filename, 'r').read()
return dict(DATA=raw, EXTENSION='bin', METADATA=collections.OrderedDict(FILENAME=filename, FILESIZE=len(raw)))
def from_dir(dirname):
outputs = []
try:
root, dirs, files = os.walk(dirname).next()
for file_ in files:
outputs.append(from_file(os.path.join(dirname, file_)))
except Exception as e:
pass
return outputs
def to_xmlfile(data):
f = open(data['METADATA']['FILENAME'] + '.xml', 'w')
f.write(to_xml(data))
f.close()
return ''
# Formats
def to_xml(data):
return lxml.etree.tostring(_dict2xmlElement(data['METADATA']), pretty_print=True)
# File hashs
def md5sum(data):
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], MD5SUM=hashlib.md5(data['DATA']).hexdigest()))
def sha1sum(data):
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA1SUM=hashlib.sha1(data['DATA']).hexdigest()))
def sha256sum(data):
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA256SUM=hashlib.sha256(data['DATA']).hexdigest()))
def sha512sum(data):
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], SHA512SUM=hashlib.sha512(data['DATA']).hexdigest()))
def crc32(data):
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], CRC32=format(zlib.crc32(data['DATA']) & 0xFFFFFFFF, '08x')))
# File info
def file_type(data):
output = subprocess.check_output(['/usr/bin/file', data['METADATA']['FILENAME']])
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], FILETYPE=output.split(':')[1].split(',')[0].strip()))
# Embedded content
def extract_ipv4_addresses(data):
ipv4_addrs = []
for ipv4_addr in IPV4_ADDRESS.findall(data['DATA']):
ipv4_addrs.append('.'.join(ipv4_addr))
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], IPV4_ADDRESSES=data['METADATA'].get('IPV4_ADDRESSES', []) + ipv4_addrs))
def extract_base64_strings(data):
base64_strs = []
for base64_str in BASE64_STR.findall(data['DATA']):
try:
decoded_base64_str = base64.b64decode(base64_str)
printable = all(c in string.printable for c in decoded_base64_str)
if not printable:
decoded_base64_str = decoded_base64_str.encode('hex')
base64_strs.append({'printable': str(printable), 'encoded_value': base64_str, 'encoded_len': len(base64_str), 'decoded_value': decoded_base64_str, 'decoded_len': len(decoded_base64_str)})
except TypeError:
pass
return dict(data, METADATA=collections.OrderedDict(data['METADATA'], BASE64_STRINGS=data['METADATA'].get('BASE64_STRINGS', []) + base64_strs))
def draw_msgbox(string):
msgbox = '\n'
msgbox += '/' * 80 + '\n'
msgbox += '/' + ' '.ljust(78) + '/' + '\n'
msgbox += '/' + ' '.ljust(78) + '/' + '\n'
msgbox += '/ ' + string.ljust(76) + '/' + '\n'
msgbox += '/' + ' '.ljust(78) + '/' + '\n'
msgbox += '/' + ' '.ljust(78) + '/' + '\n'
msgbox += '/' * 80 + '\n'
return msgbox
if __name__ == "SMALL":
print draw_msgbox('Welcome to [S]imple [M]alware [A]na[L]ysis [L]anguage (SMALL) Interpreter')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment