Skip to content

Instantly share code, notes, and snippets.

@edeca
Created October 17, 2019 12:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edeca/91e698474f55bd5d5f2462ba3223d8c4 to your computer and use it in GitHub Desktop.
Save edeca/91e698474f55bd5d5f2462ba3223d8c4 to your computer and use it in GitHub Desktop.
Simple script to turn strings copied from IDA into Yara strings
import fileinput
import re
import string
########
# Author: David Cannings
#
# Convert IDA string output to a Yara rule, escaping as necessary
# and using unicode modifiers.
########
# TODO: Try with 64-bit addresses
# TODO: Try with strings from other sections
# TODO: Try with other string types (e.g. Pascal)
pattern = "\.?(?:UPX\d|const|text|strings|seg\d+|(?:[ro]+)?data):[0-9A-Z]{4,16}\s+[0-9A-Z]{8}\s+(unicode|C \(16 bits\) - UTF-16LE|C \(16 bits\)|C)\s+([^ ].+)"
# Examples:
#
# .Net - .strings:1012 00000016 unicode c:\\log.txt
# AMD64 - .data:000000018000C338 0000000C unicode SInfo
# ELF - .rodata:08061805 00000033 C 12IAgentModule
#
# Latest IDA7 - .rdata:003E89C4 00000012 C dns is running...
for line in fileinput.input():
line = line.strip()
m = re.match(pattern, line)
if m:
modifiers = ""
orig = m.group(2)
# TODO: This doesn't work when IDA exports strings as
# "UTF-16LE" and the encoding is lost. IDA7 also
# does not mark all strings with their actual encoding (?)
wide = [ "unicode", "16 bits" ]
if any(s in m.group(1) for s in wide):
modifiers = "wide"
# Fixups for escape sequences Yara doesn't like
converted = []
#s = s.replace("\\r", "\\x0D")
#s = s.replace("\\n", "\\x0A")
for c in orig:
if c not in string.printable:
# print("{} is not printable!".format(c))
converted.append("\\x{:02x}".format(ord(c)))
else:
converted.append(c)
#print(converted)
print(' $ = "{}" {}'.format("".join(converted), modifiers))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment