Skip to content

Instantly share code, notes, and snippets.

@notareverser
Created January 19, 2022 16:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save notareverser/fb5e21b9439a25d974a820ab6dcab12b to your computer and use it in GitHub Desktop.
Save notareverser/fb5e21b9439a25d974a820ab6dcab12b to your computer and use it in GitHub Desktop.
A Python program to generate regular-expression YARA signatures for yes/no maps of shellcode under single-byte encodings
#!/usr/bin/env python
# for our homey, Claude Shannon
import sys
import logging
import binascii
import hashlib
import argparse
from collections import defaultdict
logging.basicConfig( level=logging.WARNING,
format='%(asctime)s %(levelname)-8s %(message)s',
datefmt='%Y-%m-%dT%H:%M:%S',
handlers={logging.StreamHandler(sys.stderr)})
def spacify(data):
return ' '.encode().join(([data[i:i+2] for i in range(0, len(data), 2)]))
def convertToSignatureClause(data, useSpaces = True):
result = binascii.b2a_hex(data)
if useSpaces: result = spacify(result)
return result.decode('utf-8')
def rollingXOR(data, keyStart = 0):
odata = bytearray()
for x in range(len(data)):
odata.append(data[x] ^ ((keyStart+x)&0xff))
return odata
def formatCount(count):
return '{'+'{:d}'.format(count) + '}'
def convertToRegex(val, yesCounts, noCounts):
yByteClause = "\\x"+"{:02x}".format(val)
nByteClause = "[^\\x{:02x}".format(val) + ']'
clauses = []
for n in range(len(noCounts)):
clauses.append(yByteClause + formatCount(yesCounts[n]))
clauses.append(nByteClause + formatCount(noCounts[n]))
#add the final yes
clauses.append( yByteClause + formatCount(yesCounts[-1]))
# YARA's single-line modifier is an 's' at the end of the regex
finalRegex = '/' + ''.join(clauses) + '/s'
return finalRegex
def getRegexes(locations):
# convert to yes/no map starting with the first offset
# try to collapse consecutive yesses to a multi-count yes
yesCounts = []
noCounts = []
penultimate = len(locations)-1
x = 0
while x < penultimate:
yesCount = 1
noCount = 0
# advance to the next location as long as it is one away
# incrementing the yes count for each +1 location
while x < penultimate and (locations[x+1] - locations[x]) == 1:
yesCount += 1
x += 1
yesCounts.append(yesCount)
if x < penultimate:
delta = (1 if yesCount else 0)
noCount = (locations[x+1] - locations[x] - delta)
noCounts.append(noCount)
x += 1
if len(yesCounts) == len(noCounts):
yesCounts.append(1)
logging.debug("Yes/no counts")
logging.debug(yesCounts)
logging.debug(noCounts)
regexes = []
# now we are going to iterate over all 256 byte values, creating the yes/no
# regex for each one
for val in range(256):
valRegex = convertToRegex(val, yesCounts, noCounts)
#logging.debug(valRegex)
regexes.append( (val, valRegex) )
return regexes
def computeRegexSignatures(data, fmd5, args):
nl = '\n'
counts = defaultdict(int)
for d in data:
if d not in args.ignore:
counts[d] += 1
if len(counts) == 0:
logging.error("No non-ignored bytes found in file {:s}, cannot compute Shannon signature!".format(fmd5))
else:
# find most frequently occurring byte
frequencies = sorted(counts.items(), key=lambda x: x[1], reverse=True)
val, count = frequencies[0]
logging.info("Highest frequency byte is 0x{:02x} with count {:d}".format(val, count))
locations = [i for i, x in enumerate(data) if val == x]
logging.debug("All locations of byte 0x{:02x}: {}".format(val, locations))
regexes = getRegexes(locations)
signatureLines = []
signatureLines.append("rule FMD5_{:s}_RegexMap".format(fmd5))
signatureLines.append("{")
signatureLines.append(" strings:")
if args.emitCleartext:
signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
for (val, regex) in regexes:
signatureLines.append(" $reg_{:02x} = ".format(val) + regex)
signatureLines.append(" condition:")
signatureLines.append(" any of them")
signatureLines.append("}")
print(nl.join(signatureLines) + nl)
def computeRollingXORSignatures(data, fmd5, args):
nl = '\n'
signatureLines = []
signatureLines.append("rule FMD5_{:s}_RollingXOR".format(fmd5))
signatureLines.append("{")
signatureLines.append(" strings:")
if args.emitCleartext:
signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
for x in range(1, 255):
tdata = rollingXOR(data, x)
sdata = convertToSignatureClause(tdata, args.spacify)
sline = " $key_{:02x}".format(x) + " = {" + sdata + "}"
signatureLines.append(sline)
signatureLines.append(" condition:")
signatureLines.append(" any of them")
signatureLines.append("}")
print( nl.join(signatureLines) + nl)
def parseArguments():
parser = argparse.ArgumentParser(description="Frequency analyzer and YARA signature generator for shellcode. Give it raw shellcode files and it will create YARA signatures for single-byte encodings")
parser.add_argument('files', nargs='+')
parser.add_argument('-s', '--spacify', action='store_true', default=False, help='If specified, spacifies any YARA signatures between byte values (as appropriate)')
parser.add_argument('-e', '--emitCleartext', action='store_true', default=False, help='If specified, emits a cleartext clause for the selected signatures (which will match the native shellcode directly')
parser.add_argument('-i', '--ignore', action='append', default=None, help='Can be specified multiple times. If specified, ignore the specified byte value (hexadecimal encoding) when computing frequencies. Must be used with --analyze')
parser.add_argument('-n', '--numbytes', action='store', type=int, default=-1, help='If specified, only use up to numbytes bytes of the shellcode file')
parser.add_argument('-o', '--offset', action='store', type=int, default=0, help='If specified, start at the specified offset into the shellcode file')
parser.add_argument('-a', '--analyze', action='store_true', default=False, help='If specified, analyze the shellcode and compute the on/off regular expression YARA signature')
parser.add_argument('-r', '--rollingXor', action='store_true', default=False, help='If specified, analyze the shellcode and compute the rolling XOR YARA signature (rolling XOR increments the XOR key by 1 for each payload byte (mod 0xff)')
parser.add_argument('-v', '--verbose', action='store', default=None, help='If specified, output verbose input')
args = parser.parse_args()
if not args.analyze and not args.rollingXor:
logging.error("Need to specify which mode to use!")
sys.exit(0)
if args.verbose != None:
newLevel = getattr(logging, args.verbose.upper(), None)
if isinstance(newLevel, int):
logging.getLogger().setLevel(newLevel)
# turn the ignore list into actual byte values
if args.ignore != None:
tignore = []
for i in args.ignore:
if '0x' not in i:
ti = '0x'+i
i = ti
oi = int(i, 16)
if oi > 0xff:
logging.warning("Ignore value {:s} outside legit range, masking with &0xff".format(i))
oi = oi & 0xff
logging.debug("Converting ignore byte {:s} to 0x{:02x}".format(i, oi))
tignore.append(oi)
args.ignore = tignore
else:
args.ignore = []
return args
def main():
args = parseArguments()
for f in args.files:
logging.info("Processing file {:s}".format(f))
fdata = open(f, 'rb').read()
fmd5 = hashlib.md5(fdata).hexdigest()
logging.info("File {:s} contains {:d} bytes".format(f, len(fdata)))
if args.offset > len(fdata):
logging.error("Cannot specify offset larger than the file size for {:s}!".format(f))
logging.error("Skipping file {:s}".format(f))
continue
if args.numbytes > len(fdata):
logging.error("Cannot specify number of bytes ({:d}) greater than the file size ({:d}) for {:s}!".format(args.numbytes, len(fdata), f))
logging.error("Skipping file {:s}".format(f))
continue
starti = args.offset
endi = args.numbytes
if args.numbytes != -1:
endi = starti+args.numbytes
if endi > len(fdata):
logging.error("Offset {:d} and numbytes {:d} is a range outside the bounds of the specified file!".format(args.offset, args.numbytes))
logging.error("Skipping file {:s}".format(f))
continue
realData = fdata[starti:endi]
if args.analyze: computeRegexSignatures(realData, fmd5, args)
if args.rollingXor: computeRollingXORSignatures(realData, fmd5, args)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment