Skip to content

Instantly share code, notes, and snippets.

@stvemillertime
Forked from williballenthin/ida_match_yara.py
Created August 16, 2021 13:36
Show Gist options
  • Save stvemillertime/ca26c9862ae8aa140305f478279f4c1b to your computer and use it in GitHub Desktop.
Save stvemillertime/ca26c9862ae8aa140305f478279f4c1b to your computer and use it in GitHub Desktop.
search for YARA matches in each function within IDA Pro.
#!/usr/bin/env python2
'''
search for YARA matches in each function within IDA Pro.
upon execution, prompts the user to provide the YARA rules file.
requirements:
- hexdump
- yara-python
author: Willi Ballenthin
email: william.ballenthin@fireeye.com
'''
import os
import os.path
import logging
import collections
import yara
import hexdump
import idc
import idaapi
import idautils
logger = logging.getLogger('yara.ida')
def append_comment(ea, s, repeatable=False):
'''
add the given string as a (possibly repeating) comment to the given address.
does not add the comment if it already exists.
adds the comment on its own line.
Args:
ea (int): the address at which to add the comment.
s (str): the comment text.
repeatable (bool): if True, set a repeatable comment.
Raises:
UnicodeEncodeError: if the given string is not ascii.
'''
# see: http://blogs.norman.com/2011/security-research/improving-ida-analysis-of-x64-exception-handling
s = s.encode('ascii')
if repeatable:
string = idc.RptCmt(ea)
else:
string = idc.Comment(ea)
if not string:
string = s # no existing comment
else:
if s in string: # ignore duplicates
return
string = string + '\n' + s
if repeatable:
idc.MakeRptCmt(ea, string)
else:
idc.MakeComm(ea, string)
def get_data(start, size):
'''
read the given amount of data from the given start address.
better than `idc.GetManyBytes` as it fills in missing bytes with NULLs.
Args:
start (int): start address.
size (int): number of bytes to read.
Returns:
bytes: `size` bytes, filled with NULL when byte not available from database.
'''
# best, case, works pretty often.
buf = idc.GetManyBytes(start, size)
if buf:
return buf
# but may fail, when there's no byte defined.
buf = []
for ea in range(start, start+size):
b = idc.GetManyBytes(ea, 1)
if b:
buf.append(b)
else:
buf.append(b'\x00')
return b''.join(buf)
def get_functions():
'''
enumerate the functions in the currently loaded module.
Yields:
int: address of the function.
'''
for segstart in idautils.Segments():
for fva in idautils.Functions(idc.SegStart(segstart), idc.SegEnd(segstart)):
yield fva
def get_function_data(fva):
for (begin, end) in idautils.Chunks(fva):
ret = []
for head in idautils.Heads(begin, end):
size = idc.ItemSize(head)
buf = idc.GetManyBytes(head, size)
ret.append(buf)
yield begin, b''.join(ret)
def match_function(rules, fva):
logger.debug('matching function: 0x%x', fva)
for chunkstart, chunk in get_function_data(fva):
for match in rules.match(data=chunk):
for (offset, sname, s) in match.strings:
yield match.namespace, match.rule, sname, fva, chunkstart+offset, s
def prompt_for_file_path(title="Select a file to open"):
class MyForm(idaapi.Form):
def __init__(self):
self.invert = False
idaapi.Form.__init__(self, r"""{title:s}
<#{title:s}#{title:s}:{{iFileOpen}}>
""".format(title=title), { 'iFileOpen': idaapi.Form.FileInput(open=True), })
def OnFormChange(self, fid):
return 1
f = MyForm()
f.Compile()
f.iFileOpen.value = ""
ok = f.Execute()
if ok == 1:
ret = f.iFileOpen.value
f.Free()
return ret
f.Free()
return None
def annotate_match(va, namespace, rulename, stringname):
append_comment(va, 'yara: %s/%s/%s' % (namespace, rulename, stringname))
def find_yara_rules(paths):
'''
search the given sequence of paths for either:
- file paths ending in .yara
- directories that contain filenames ending in .yara
Args:
paths (List[str]): list of file system paths to files or directories.
Returns:
List[str]: list of file system paths to files with extension ".yara".
'''
ret = []
for path in paths:
if os.path.isfile(path):
if not path.endswith('.yara'):
continue
if not os.path.exists(path):
logger.warning('YARA rule does not exist: %s', path)
continue
ret.append(path)
elif os.path.isdir(path):
if not os.path.exists(path):
logger.warning('YARA rule directory does not exist: %s', path)
continue
for filename in os.listdir(path):
if not filename.endswith('.yara'):
continue
ret.append(os.path.join(path, filename))
return ret
def main():
logging.basicConfig(level=logging.INFO)
logging.getLogger().setLevel(logging.INFO)
if idc.ARGV:
logger.debug('found IDA script cli arguments, using those as YARA rules')
yarapaths = find_yara_rules(idc.ARGV)
else:
yarapath = prompt_for_file_path(title='Select YARA rule file')
yarapaths = [yarapath]
logger.info('compiling YARA rules...')
# from file basename (no extension) as namespace to file path
rules = yara.compile(filepaths={os.path.basename(path).rpartition('.')[0]: path
for path in yarapaths})
logger.info('matching YARA rules...')
for fva in get_functions():
matched_vas = collections.defaultdict(lambda: set([]))
for namespace, rulename, stringname, fva, va, string in match_function(rules, fva):
if va in matched_vas[rulename]:
# personal preference: ignore overlapping matches
continue
logger.info('MATCH: rule: %s/%s/%s', namespace, rulename, stringname)
logger.info('location: %s (0x%x) at offset: 0x%x', idc.GetFunctionName(fva), fva, va)
logger.info('content:\n%s', hexdump.hexdump(string, result='return'))
annotate_match(va, namespace, rulename, stringname)
for i in range(va, va + len(string)):
matched_vas[rulename].add(i)
logger.info('done matching YARA')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment