Created
January 19, 2022 16:37
-
-
Save notareverser/fb5e21b9439a25d974a820ab6dcab12b to your computer and use it in GitHub Desktop.
A Python program to generate regular-expression YARA signatures for yes/no maps of shellcode under single-byte encodings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# for our homey, Claude Shannon | |
import sys | |
import logging | |
import binascii | |
import hashlib | |
import argparse | |
from collections import defaultdict | |
logging.basicConfig( level=logging.WARNING, | |
format='%(asctime)s %(levelname)-8s %(message)s', | |
datefmt='%Y-%m-%dT%H:%M:%S', | |
handlers={logging.StreamHandler(sys.stderr)}) | |
def spacify(data): | |
return ' '.encode().join(([data[i:i+2] for i in range(0, len(data), 2)])) | |
def convertToSignatureClause(data, useSpaces = True): | |
result = binascii.b2a_hex(data) | |
if useSpaces: result = spacify(result) | |
return result.decode('utf-8') | |
def rollingXOR(data, keyStart = 0): | |
odata = bytearray() | |
for x in range(len(data)): | |
odata.append(data[x] ^ ((keyStart+x)&0xff)) | |
return odata | |
def formatCount(count): | |
return '{'+'{:d}'.format(count) + '}' | |
def convertToRegex(val, yesCounts, noCounts): | |
yByteClause = "\\x"+"{:02x}".format(val) | |
nByteClause = "[^\\x{:02x}".format(val) + ']' | |
clauses = [] | |
for n in range(len(noCounts)): | |
clauses.append(yByteClause + formatCount(yesCounts[n])) | |
clauses.append(nByteClause + formatCount(noCounts[n])) | |
#add the final yes | |
clauses.append( yByteClause + formatCount(yesCounts[-1])) | |
# YARA's single-line modifier is an 's' at the end of the regex | |
finalRegex = '/' + ''.join(clauses) + '/s' | |
return finalRegex | |
def getRegexes(locations): | |
# convert to yes/no map starting with the first offset | |
# try to collapse consecutive yesses to a multi-count yes | |
yesCounts = [] | |
noCounts = [] | |
penultimate = len(locations)-1 | |
x = 0 | |
while x < penultimate: | |
yesCount = 1 | |
noCount = 0 | |
# advance to the next location as long as it is one away | |
# incrementing the yes count for each +1 location | |
while x < penultimate and (locations[x+1] - locations[x]) == 1: | |
yesCount += 1 | |
x += 1 | |
yesCounts.append(yesCount) | |
if x < penultimate: | |
delta = (1 if yesCount else 0) | |
noCount = (locations[x+1] - locations[x] - delta) | |
noCounts.append(noCount) | |
x += 1 | |
if len(yesCounts) == len(noCounts): | |
yesCounts.append(1) | |
logging.debug("Yes/no counts") | |
logging.debug(yesCounts) | |
logging.debug(noCounts) | |
regexes = [] | |
# now we are going to iterate over all 256 byte values, creating the yes/no | |
# regex for each one | |
for val in range(256): | |
valRegex = convertToRegex(val, yesCounts, noCounts) | |
#logging.debug(valRegex) | |
regexes.append( (val, valRegex) ) | |
return regexes | |
def computeRegexSignatures(data, fmd5, args): | |
nl = '\n' | |
counts = defaultdict(int) | |
for d in data: | |
if d not in args.ignore: | |
counts[d] += 1 | |
if len(counts) == 0: | |
logging.error("No non-ignored bytes found in file {:s}, cannot compute Shannon signature!".format(fmd5)) | |
else: | |
# find most frequently occurring byte | |
frequencies = sorted(counts.items(), key=lambda x: x[1], reverse=True) | |
val, count = frequencies[0] | |
logging.info("Highest frequency byte is 0x{:02x} with count {:d}".format(val, count)) | |
locations = [i for i, x in enumerate(data) if val == x] | |
logging.debug("All locations of byte 0x{:02x}: {}".format(val, locations)) | |
regexes = getRegexes(locations) | |
signatureLines = [] | |
signatureLines.append("rule FMD5_{:s}_RegexMap".format(fmd5)) | |
signatureLines.append("{") | |
signatureLines.append(" strings:") | |
if args.emitCleartext: | |
signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}") | |
for (val, regex) in regexes: | |
signatureLines.append(" $reg_{:02x} = ".format(val) + regex) | |
signatureLines.append(" condition:") | |
signatureLines.append(" any of them") | |
signatureLines.append("}") | |
print(nl.join(signatureLines) + nl) | |
def computeRollingXORSignatures(data, fmd5, args): | |
nl = '\n' | |
signatureLines = [] | |
signatureLines.append("rule FMD5_{:s}_RollingXOR".format(fmd5)) | |
signatureLines.append("{") | |
signatureLines.append(" strings:") | |
if args.emitCleartext: | |
signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}") | |
for x in range(1, 255): | |
tdata = rollingXOR(data, x) | |
sdata = convertToSignatureClause(tdata, args.spacify) | |
sline = " $key_{:02x}".format(x) + " = {" + sdata + "}" | |
signatureLines.append(sline) | |
signatureLines.append(" condition:") | |
signatureLines.append(" any of them") | |
signatureLines.append("}") | |
print( nl.join(signatureLines) + nl) | |
def parseArguments(): | |
parser = argparse.ArgumentParser(description="Frequency analyzer and YARA signature generator for shellcode. Give it raw shellcode files and it will create YARA signatures for single-byte encodings") | |
parser.add_argument('files', nargs='+') | |
parser.add_argument('-s', '--spacify', action='store_true', default=False, help='If specified, spacifies any YARA signatures between byte values (as appropriate)') | |
parser.add_argument('-e', '--emitCleartext', action='store_true', default=False, help='If specified, emits a cleartext clause for the selected signatures (which will match the native shellcode directly') | |
parser.add_argument('-i', '--ignore', action='append', default=None, help='Can be specified multiple times. If specified, ignore the specified byte value (hexadecimal encoding) when computing frequencies. Must be used with --analyze') | |
parser.add_argument('-n', '--numbytes', action='store', type=int, default=-1, help='If specified, only use up to numbytes bytes of the shellcode file') | |
parser.add_argument('-o', '--offset', action='store', type=int, default=0, help='If specified, start at the specified offset into the shellcode file') | |
parser.add_argument('-a', '--analyze', action='store_true', default=False, help='If specified, analyze the shellcode and compute the on/off regular expression YARA signature') | |
parser.add_argument('-r', '--rollingXor', action='store_true', default=False, help='If specified, analyze the shellcode and compute the rolling XOR YARA signature (rolling XOR increments the XOR key by 1 for each payload byte (mod 0xff)') | |
parser.add_argument('-v', '--verbose', action='store', default=None, help='If specified, output verbose input') | |
args = parser.parse_args() | |
if not args.analyze and not args.rollingXor: | |
logging.error("Need to specify which mode to use!") | |
sys.exit(0) | |
if args.verbose != None: | |
newLevel = getattr(logging, args.verbose.upper(), None) | |
if isinstance(newLevel, int): | |
logging.getLogger().setLevel(newLevel) | |
# turn the ignore list into actual byte values | |
if args.ignore != None: | |
tignore = [] | |
for i in args.ignore: | |
if '0x' not in i: | |
ti = '0x'+i | |
i = ti | |
oi = int(i, 16) | |
if oi > 0xff: | |
logging.warning("Ignore value {:s} outside legit range, masking with &0xff".format(i)) | |
oi = oi & 0xff | |
logging.debug("Converting ignore byte {:s} to 0x{:02x}".format(i, oi)) | |
tignore.append(oi) | |
args.ignore = tignore | |
else: | |
args.ignore = [] | |
return args | |
def main(): | |
args = parseArguments() | |
for f in args.files: | |
logging.info("Processing file {:s}".format(f)) | |
fdata = open(f, 'rb').read() | |
fmd5 = hashlib.md5(fdata).hexdigest() | |
logging.info("File {:s} contains {:d} bytes".format(f, len(fdata))) | |
if args.offset > len(fdata): | |
logging.error("Cannot specify offset larger than the file size for {:s}!".format(f)) | |
logging.error("Skipping file {:s}".format(f)) | |
continue | |
if args.numbytes > len(fdata): | |
logging.error("Cannot specify number of bytes ({:d}) greater than the file size ({:d}) for {:s}!".format(args.numbytes, len(fdata), f)) | |
logging.error("Skipping file {:s}".format(f)) | |
continue | |
starti = args.offset | |
endi = args.numbytes | |
if args.numbytes != -1: | |
endi = starti+args.numbytes | |
if endi > len(fdata): | |
logging.error("Offset {:d} and numbytes {:d} is a range outside the bounds of the specified file!".format(args.offset, args.numbytes)) | |
logging.error("Skipping file {:s}".format(f)) | |
continue | |
realData = fdata[starti:endi] | |
if args.analyze: computeRegexSignatures(realData, fmd5, args) | |
if args.rollingXor: computeRollingXORSignatures(realData, fmd5, args) | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment