edeca/ida2yara.py

## ida2yara.py
import fileinput
import re
import string

########
# Author: David Cannings
#
# Convert IDA string output to a Yara rule, escaping as necessary
# and using unicode modifiers.
########

# TODO: Try with 64-bit addresses
# TODO: Try with strings from other sections
# TODO: Try with other string types (e.g. Pascal)
pattern = "\.?(?:UPX\d|const|text|strings|seg\d+|(?:[ro]+)?data):[0-9A-Z]{4,16}\s+[0-9A-Z]{8}\s+(unicode|C \(16 bits\) - UTF-16LE|C \(16 bits\)|C)\s+([^ ].+)"

# Examples:
#
# .Net  - .strings:1012 00000016 unicode c:\\log.txt
# AMD64 - .data:000000018000C338 0000000C unicode SInfo
# ELF   - .rodata:08061805 00000033 C 12IAgentModule
#
# Latest IDA7 - .rdata:003E89C4 00000012        C       dns is running...

for line in fileinput.input():
    line = line.strip()
    m = re.match(pattern, line)
    if m:
        modifiers = ""
        orig = m.group(2)

        # TODO: This doesn't work when IDA exports strings as
        #       "UTF-16LE" and the encoding is lost.  IDA7 also
        #       does not mark all strings with their actual encoding (?)
        wide = [ "unicode", "16 bits" ]
        if any(s in m.group(1) for s in wide):
            modifiers = "wide"

        # Fixups for escape sequences Yara doesn't like
        converted = []
        #s = s.replace("\\r", "\\x0D")
        #s = s.replace("\\n", "\\x0A")

        for c in orig:
            if c not in string.printable:
#                print("{} is not printable!".format(c))
                converted.append("\\x{:02x}".format(ord(c)))
            else:
                converted.append(c)

        #print(converted)

        print('    $ = "{}" {}'.format("".join(converted), modifiers))
	import fileinput
	import re
	import string

	########
	# Author: David Cannings
	#
	# Convert IDA string output to a Yara rule, escaping as necessary
	# and using unicode modifiers.
	########

	# TODO: Try with 64-bit addresses
	# TODO: Try with strings from other sections
	# TODO: Try with other string types (e.g. Pascal)
	pattern = "\.?(?:UPX\d\|const\|text\|strings\|seg\d+\|(?:[ro]+)?data):[0-9A-Z]{4,16}\s+[0-9A-Z]{8}\s+(unicode\|C \(16 bits\) - UTF-16LE\|C \(16 bits\)\|C)\s+([^ ].+)"

	# Examples:
	#
	# .Net - .strings:1012 00000016 unicode c:\\log.txt
	# AMD64 - .data:000000018000C338 0000000C unicode SInfo
	# ELF - .rodata:08061805 00000033 C 12IAgentModule
	#
	# Latest IDA7 - .rdata:003E89C4 00000012 C dns is running...

	for line in fileinput.input():
	line = line.strip()
	m = re.match(pattern, line)
	if m:
	modifiers = ""
	orig = m.group(2)

	# TODO: This doesn't work when IDA exports strings as
	# "UTF-16LE" and the encoding is lost. IDA7 also
	# does not mark all strings with their actual encoding (?)
	wide = [ "unicode", "16 bits" ]
	if any(s in m.group(1) for s in wide):
	modifiers = "wide"

	# Fixups for escape sequences Yara doesn't like
	converted = []
	#s = s.replace("\\r", "\\x0D")
	#s = s.replace("\\n", "\\x0A")

	for c in orig:
	if c not in string.printable:
	# print("{} is not printable!".format(c))
	converted.append("\\x{:02x}".format(ord(c)))
	else:
	converted.append(c)

	#print(converted)

	print(' $ = "{}" {}'.format("".join(converted), modifiers))