notareverser/shannon-sig.py

## shannon-sig.py
#!/usr/bin/env python

# for our homey, Claude Shannon

import sys
import logging
import binascii
import hashlib
import argparse

from collections import defaultdict

logging.basicConfig( level=logging.WARNING,
                     format='%(asctime)s %(levelname)-8s %(message)s',
                     datefmt='%Y-%m-%dT%H:%M:%S',
                     handlers={logging.StreamHandler(sys.stderr)})


def spacify(data):
    return ' '.encode().join(([data[i:i+2] for i in range(0, len(data), 2)]))


def convertToSignatureClause(data, useSpaces = True):
    result = binascii.b2a_hex(data)
    if useSpaces: result = spacify(result)
    return result.decode('utf-8')


def rollingXOR(data, keyStart = 0):
    odata = bytearray()
    for x in range(len(data)):
        odata.append(data[x] ^ ((keyStart+x)&0xff))
    return odata


def formatCount(count):
    return '{'+'{:d}'.format(count) + '}'


def convertToRegex(val, yesCounts, noCounts):

    yByteClause = "\\x"+"{:02x}".format(val)
    nByteClause = "[^\\x{:02x}".format(val) + ']'
    clauses = []

    for n in range(len(noCounts)):
        clauses.append(yByteClause + formatCount(yesCounts[n]))
        clauses.append(nByteClause + formatCount(noCounts[n]))

    #add the final yes
    clauses.append( yByteClause + formatCount(yesCounts[-1]))

    # YARA's single-line modifier is an 's' at the end of the regex
    finalRegex = '/' + ''.join(clauses) + '/s'
    return finalRegex


def getRegexes(locations):

    # convert to yes/no map starting with the first offset
    # try to collapse consecutive yesses to a multi-count yes

    yesCounts = []
    noCounts = []
    penultimate = len(locations)-1
    x = 0
    while x < penultimate:
        yesCount = 1
        noCount = 0
        # advance to the next location as long as it is one away
        # incrementing the yes count for each +1 location
        while x < penultimate and (locations[x+1] - locations[x]) == 1:
            yesCount += 1
            x += 1

        yesCounts.append(yesCount)
        if x < penultimate:
            delta = (1 if yesCount else 0)
            noCount = (locations[x+1] - locations[x] - delta)
            noCounts.append(noCount)
        x += 1


    if len(yesCounts) == len(noCounts):
        yesCounts.append(1)

    logging.debug("Yes/no counts")
    logging.debug(yesCounts)
    logging.debug(noCounts)

    regexes = []

    # now we are going to iterate over all 256 byte values, creating the yes/no
    # regex for each one
    for val in range(256):
        valRegex = convertToRegex(val, yesCounts, noCounts)
        #logging.debug(valRegex)
        regexes.append( (val, valRegex) )

    return regexes


def computeRegexSignatures(data, fmd5, args):

    nl = '\n'
    counts = defaultdict(int)
    for d in data:
        if d not in args.ignore:
            counts[d] += 1

    if len(counts) == 0:
        logging.error("No non-ignored bytes found in file {:s}, cannot compute Shannon signature!".format(fmd5))
    else:
        # find most frequently occurring byte
        frequencies = sorted(counts.items(), key=lambda x: x[1], reverse=True)
        val, count = frequencies[0]
        logging.info("Highest frequency byte is 0x{:02x} with count {:d}".format(val, count))

        locations = [i for i, x in enumerate(data) if val == x]
        logging.debug("All locations of byte 0x{:02x}:  {}".format(val, locations))

        regexes = getRegexes(locations)


        signatureLines = []
        signatureLines.append("rule FMD5_{:s}_RegexMap".format(fmd5))
        signatureLines.append("{")
        signatureLines.append("  strings:")
        if args.emitCleartext:
            signatureLines.append("    $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
        for (val, regex) in regexes:
            signatureLines.append("    $reg_{:02x} = ".format(val) + regex)
        signatureLines.append("  condition:")
        signatureLines.append("    any of them")
        signatureLines.append("}")
        print(nl.join(signatureLines) + nl)


def computeRollingXORSignatures(data, fmd5, args):
    nl = '\n'
    signatureLines = []
    signatureLines.append("rule FMD5_{:s}_RollingXOR".format(fmd5))
    signatureLines.append("{")
    signatureLines.append("  strings:")
    if args.emitCleartext:
        signatureLines.append("    $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
    for x in range(1, 255):
        tdata = rollingXOR(data, x)
        sdata = convertToSignatureClause(tdata, args.spacify)
        sline = "     $key_{:02x}".format(x) + " = {" + sdata + "}"
        signatureLines.append(sline)

    signatureLines.append("  condition:")
    signatureLines.append("    any of them")
    signatureLines.append("}")
    print( nl.join(signatureLines) + nl)


def parseArguments():

    parser = argparse.ArgumentParser(description="Frequency analyzer and YARA signature generator for shellcode. Give it raw shellcode files and it will create YARA signatures for single-byte encodings")
    parser.add_argument('files', nargs='+')
    parser.add_argument('-s', '--spacify', action='store_true', default=False, help='If specified, spacifies any YARA signatures between byte values (as appropriate)')
    parser.add_argument('-e', '--emitCleartext', action='store_true', default=False, help='If specified, emits a cleartext clause for the selected signatures (which will match the native shellcode directly')
    parser.add_argument('-i', '--ignore', action='append', default=None, help='Can be specified multiple times. If specified, ignore the specified byte value (hexadecimal encoding) when computing frequencies. Must be used with --analyze')
    parser.add_argument('-n', '--numbytes', action='store', type=int, default=-1, help='If specified, only use up to numbytes bytes of the shellcode file')
    parser.add_argument('-o', '--offset', action='store', type=int, default=0, help='If specified, start at the specified offset into the shellcode file')
    parser.add_argument('-a', '--analyze', action='store_true', default=False, help='If specified, analyze the shellcode and compute the on/off regular expression YARA signature')
    parser.add_argument('-r', '--rollingXor', action='store_true', default=False, help='If specified, analyze the shellcode and compute the rolling XOR YARA signature (rolling XOR increments the XOR key by 1 for each payload byte (mod 0xff)')
    parser.add_argument('-v', '--verbose', action='store', default=None, help='If specified, output verbose input')

    args = parser.parse_args()

    if not args.analyze and not args.rollingXor:
        logging.error("Need to specify which mode to use!")
        sys.exit(0)


    if args.verbose != None:
        newLevel = getattr(logging, args.verbose.upper(), None)
        if isinstance(newLevel, int):
            logging.getLogger().setLevel(newLevel)

    # turn the ignore list into actual byte values
    if args.ignore != None:
        tignore = []
        for i in args.ignore:
            if '0x' not in i:
                ti = '0x'+i
                i = ti

            oi = int(i, 16)
            if oi > 0xff:
                logging.warning("Ignore value {:s} outside legit range, masking with &0xff".format(i))
            oi = oi & 0xff

            logging.debug("Converting ignore byte {:s} to 0x{:02x}".format(i, oi))

            tignore.append(oi)
        args.ignore = tignore
    else:
        args.ignore = []

    return args


def main():

    args = parseArguments()

    for f in args.files:
        logging.info("Processing file {:s}".format(f))
        fdata = open(f, 'rb').read()
        fmd5 = hashlib.md5(fdata).hexdigest()

        logging.info("File {:s} contains {:d} bytes".format(f, len(fdata)))

        if args.offset > len(fdata):
            logging.error("Cannot specify offset larger than the file size for {:s}!".format(f))
            logging.error("Skipping file {:s}".format(f))
            continue
        if args.numbytes > len(fdata):
            logging.error("Cannot specify number of bytes ({:d}) greater than the file size ({:d}) for {:s}!".format(args.numbytes, len(fdata), f))
            logging.error("Skipping file {:s}".format(f))
            continue

        starti = args.offset
        endi = args.numbytes
        if args.numbytes != -1:
            endi = starti+args.numbytes
            if endi > len(fdata):
                logging.error("Offset {:d} and numbytes {:d} is a range outside the bounds of the specified file!".format(args.offset, args.numbytes))
                logging.error("Skipping file {:s}".format(f))
                continue


        realData = fdata[starti:endi]
        if args.analyze:      computeRegexSignatures(realData, fmd5, args)
        if args.rollingXor:  computeRollingXORSignatures(realData, fmd5, args)

if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	# for our homey, Claude Shannon

	import sys
	import logging
	import binascii
	import hashlib
	import argparse

	from collections import defaultdict

	logging.basicConfig( level=logging.WARNING,
	format='%(asctime)s %(levelname)-8s %(message)s',
	datefmt='%Y-%m-%dT%H:%M:%S',
	handlers={logging.StreamHandler(sys.stderr)})


	def spacify(data):
	return ' '.encode().join(([data[i:i+2] for i in range(0, len(data), 2)]))


	def convertToSignatureClause(data, useSpaces = True):
	result = binascii.b2a_hex(data)
	if useSpaces: result = spacify(result)
	return result.decode('utf-8')


	def rollingXOR(data, keyStart = 0):
	odata = bytearray()
	for x in range(len(data)):
	odata.append(data[x] ^ ((keyStart+x)&0xff))
	return odata


	def formatCount(count):
	return '{'+'{:d}'.format(count) + '}'


	def convertToRegex(val, yesCounts, noCounts):

	yByteClause = "\\x"+"{:02x}".format(val)
	nByteClause = "[^\\x{:02x}".format(val) + ']'
	clauses = []

	for n in range(len(noCounts)):
	clauses.append(yByteClause + formatCount(yesCounts[n]))
	clauses.append(nByteClause + formatCount(noCounts[n]))

	#add the final yes
	clauses.append( yByteClause + formatCount(yesCounts[-1]))

	# YARA's single-line modifier is an 's' at the end of the regex
	finalRegex = '/' + ''.join(clauses) + '/s'
	return finalRegex




	def getRegexes(locations):

	# convert to yes/no map starting with the first offset
	# try to collapse consecutive yesses to a multi-count yes

	yesCounts = []
	noCounts = []
	penultimate = len(locations)-1
	x = 0
	while x < penultimate:
	yesCount = 1
	noCount = 0
	# advance to the next location as long as it is one away
	# incrementing the yes count for each +1 location
	while x < penultimate and (locations[x+1] - locations[x]) == 1:
	yesCount += 1
	x += 1

	yesCounts.append(yesCount)
	if x < penultimate:
	delta = (1 if yesCount else 0)
	noCount = (locations[x+1] - locations[x] - delta)
	noCounts.append(noCount)
	x += 1


	if len(yesCounts) == len(noCounts):
	yesCounts.append(1)

	logging.debug("Yes/no counts")
	logging.debug(yesCounts)
	logging.debug(noCounts)

	regexes = []

	# now we are going to iterate over all 256 byte values, creating the yes/no
	# regex for each one
	for val in range(256):
	valRegex = convertToRegex(val, yesCounts, noCounts)
	#logging.debug(valRegex)
	regexes.append( (val, valRegex) )

	return regexes




	def computeRegexSignatures(data, fmd5, args):

	nl = '\n'
	counts = defaultdict(int)
	for d in data:
	if d not in args.ignore:
	counts[d] += 1

	if len(counts) == 0:
	logging.error("No non-ignored bytes found in file {:s}, cannot compute Shannon signature!".format(fmd5))
	else:
	# find most frequently occurring byte
	frequencies = sorted(counts.items(), key=lambda x: x[1], reverse=True)
	val, count = frequencies[0]
	logging.info("Highest frequency byte is 0x{:02x} with count {:d}".format(val, count))

	locations = [i for i, x in enumerate(data) if val == x]
	logging.debug("All locations of byte 0x{:02x}: {}".format(val, locations))

	regexes = getRegexes(locations)


	signatureLines = []
	signatureLines.append("rule FMD5_{:s}_RegexMap".format(fmd5))
	signatureLines.append("{")
	signatureLines.append(" strings:")
	if args.emitCleartext:
	signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
	for (val, regex) in regexes:
	signatureLines.append(" $reg_{:02x} = ".format(val) + regex)
	signatureLines.append(" condition:")
	signatureLines.append(" any of them")
	signatureLines.append("}")
	print(nl.join(signatureLines) + nl)



	def computeRollingXORSignatures(data, fmd5, args):
	nl = '\n'
	signatureLines = []
	signatureLines.append("rule FMD5_{:s}_RollingXOR".format(fmd5))
	signatureLines.append("{")
	signatureLines.append(" strings:")
	if args.emitCleartext:
	signatureLines.append(" $cleartext = {" + convertToSignatureClause(data, args.spacify) + "}")
	for x in range(1, 255):
	tdata = rollingXOR(data, x)
	sdata = convertToSignatureClause(tdata, args.spacify)
	sline = " $key_{:02x}".format(x) + " = {" + sdata + "}"
	signatureLines.append(sline)

	signatureLines.append(" condition:")
	signatureLines.append(" any of them")
	signatureLines.append("}")
	print( nl.join(signatureLines) + nl)




	def parseArguments():

	parser = argparse.ArgumentParser(description="Frequency analyzer and YARA signature generator for shellcode. Give it raw shellcode files and it will create YARA signatures for single-byte encodings")
	parser.add_argument('files', nargs='+')
	parser.add_argument('-s', '--spacify', action='store_true', default=False, help='If specified, spacifies any YARA signatures between byte values (as appropriate)')
	parser.add_argument('-e', '--emitCleartext', action='store_true', default=False, help='If specified, emits a cleartext clause for the selected signatures (which will match the native shellcode directly')
	parser.add_argument('-i', '--ignore', action='append', default=None, help='Can be specified multiple times. If specified, ignore the specified byte value (hexadecimal encoding) when computing frequencies. Must be used with --analyze')
	parser.add_argument('-n', '--numbytes', action='store', type=int, default=-1, help='If specified, only use up to numbytes bytes of the shellcode file')
	parser.add_argument('-o', '--offset', action='store', type=int, default=0, help='If specified, start at the specified offset into the shellcode file')
	parser.add_argument('-a', '--analyze', action='store_true', default=False, help='If specified, analyze the shellcode and compute the on/off regular expression YARA signature')
	parser.add_argument('-r', '--rollingXor', action='store_true', default=False, help='If specified, analyze the shellcode and compute the rolling XOR YARA signature (rolling XOR increments the XOR key by 1 for each payload byte (mod 0xff)')
	parser.add_argument('-v', '--verbose', action='store', default=None, help='If specified, output verbose input')

	args = parser.parse_args()

	if not args.analyze and not args.rollingXor:
	logging.error("Need to specify which mode to use!")
	sys.exit(0)


	if args.verbose != None:
	newLevel = getattr(logging, args.verbose.upper(), None)
	if isinstance(newLevel, int):
	logging.getLogger().setLevel(newLevel)

	# turn the ignore list into actual byte values
	if args.ignore != None:
	tignore = []
	for i in args.ignore:
	if '0x' not in i:
	ti = '0x'+i
	i = ti

	oi = int(i, 16)
	if oi > 0xff:
	logging.warning("Ignore value {:s} outside legit range, masking with &0xff".format(i))
	oi = oi & 0xff

	logging.debug("Converting ignore byte {:s} to 0x{:02x}".format(i, oi))

	tignore.append(oi)
	args.ignore = tignore
	else:
	args.ignore = []

	return args



	def main():

	args = parseArguments()

	for f in args.files:
	logging.info("Processing file {:s}".format(f))
	fdata = open(f, 'rb').read()
	fmd5 = hashlib.md5(fdata).hexdigest()

	logging.info("File {:s} contains {:d} bytes".format(f, len(fdata)))

	if args.offset > len(fdata):
	logging.error("Cannot specify offset larger than the file size for {:s}!".format(f))
	logging.error("Skipping file {:s}".format(f))
	continue
	if args.numbytes > len(fdata):
	logging.error("Cannot specify number of bytes ({:d}) greater than the file size ({:d}) for {:s}!".format(args.numbytes, len(fdata), f))
	logging.error("Skipping file {:s}".format(f))
	continue

	starti = args.offset
	endi = args.numbytes
	if args.numbytes != -1:
	endi = starti+args.numbytes
	if endi > len(fdata):
	logging.error("Offset {:d} and numbytes {:d} is a range outside the bounds of the specified file!".format(args.offset, args.numbytes))
	logging.error("Skipping file {:s}".format(f))
	continue


	realData = fdata[starti:endi]
	if args.analyze: computeRegexSignatures(realData, fmd5, args)
	if args.rollingXor: computeRollingXORSignatures(realData, fmd5, args)

	if __name__ == '__main__':
	main()