Last active
July 3, 2021 01:15
-
-
Save Auscitte/e2f7d69f4a1023ba64d8189995073399 to your computer and use it in GitHub Desktop.
Lists code blocks belonging to a function with the help of pdbparse library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is a copy of gdata.py from pdbparse library ver. 1.5 | |
# (see https://github.com/moyix/pdbparse) | |
# with a few mofifications that are necessary for my scripts to work correctly | |
# Ry Auscitte | |
# Python 2 and 3 | |
from construct import * | |
gsym = Struct( | |
"leaf_type" / Int16ul, "data" / Switch( | |
lambda ctx: ctx.leaf_type, { | |
0x110E: | |
"data_v3" / Struct( | |
"symtype" / Int32ul, | |
"offset" / Int32ul, | |
"segment" / Int16ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x1009: | |
"data_v2" / Struct( | |
"symtype" / Int32ul, | |
"offset" / Int32ul, | |
"segment" / Int16ul, | |
"name" / PascalString(lengthfield = "length" / Int8ul, encoding = "utf8"), | |
), | |
0x1125: #from struct REFSYM2 in cvinfo.h | |
"proc_ref" / Struct( | |
"sumname" / Int32ul, | |
"offset" / Int32ul, | |
"iMod" / Int16ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x1127: #from struct REFSYM2 in cvinfo.h | |
"proc_ref" / Struct( | |
"sumname" / Int32ul, | |
"offset" / Int32ul, | |
"iMod" / Int16ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x1108: #from struct UDTSYM in cvinfo.h | |
"udt" / Struct( | |
"typind" / Int32ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x110d: #from struct DATASYM32 in cvinfo.h | |
"datasym" / Struct( | |
"typind" / Int32ul, | |
"offset" / Int32ul, | |
"segment" / Int16ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x110c: | |
"datasym" / Struct( | |
"typind" / Int32ul, | |
"offset" / Int32ul, | |
"segment" / Int16ul, | |
"name" / CString(encoding = "utf8"), | |
), | |
0x1107: | |
"const" / Struct( | |
"typind" / Int32ul, # Type index (containing enum if enumerate) or metadata token | |
"value" / Int16ul, # numeric leaf containing value | |
"name" / CString(encoding = "utf8"), | |
), | |
})) | |
GlobalsData = "globals" / GreedyRange( | |
Struct( | |
"length" / Int16ul, | |
"symbol" / RestreamData(Bytes(lambda ctx: ctx.length), gsym), | |
)) | |
def parse(data): | |
con = GlobalsData.parse(data) | |
return merge_structures(con) | |
def parse_stream(stream): | |
con = GlobalsData.parse_stream(stream) | |
return merge_structures(con) | |
def merge_structures(con): | |
new_cons = [] | |
for sym in con: | |
sym_dict = {'length': sym.length, 'leaf_type': sym.symbol.leaf_type} | |
if sym.symbol.data: | |
#RAusc: | |
for k in sym.symbol.data.keys(): | |
sym_dict[k] = sym.symbol.data[k] | |
#sym_dict.update({ | |
# 'symtype': sym.symbol.data.symtype, | |
# 'offset': sym.symbol.data.offset, | |
# 'segment': sym.symbol.data.segment, | |
# 'name': sym.symbol.data.name | |
#}) | |
new_cons.append(Container(sym_dict)) | |
result = ListContainer(new_cons) | |
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Lists code blocks belonging to a function with the help of pdbparse library. | |
The script is incomplete and aims to merely demonstrate one of possible methods. | |
:Copyright: | |
Ry Auscitte 2020. This script is distributed under GPL. | |
:Authors: | |
Ry Auscitte | |
""" | |
import pdbparse | |
import pefile | |
import sys | |
import construct as cs | |
from argparse import ArgumentParser | |
S_PROCREF = 0x1125 #reference to a procedure as defined in https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h | |
S_SEPCODE = 0x1132 #separated code | |
# The parsing constructs below folow the definitions of SEPCODESYM and PROCSYM32 | |
# from https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h | |
GlobalProc = cs.Struct( | |
"PROCSYM32" / cs.Struct( | |
"reclen" / cs.Int16ul, | |
"rectyp" / cs.Int16ul, | |
"pParent" / cs.Int32ul, | |
"pEnd" / cs.Int32ul, | |
"pNext" / cs.Int32ul, | |
"len" / cs.Int32ul, | |
"DbgStart" / cs.Int32ul, | |
"DbgEnd" / cs.Int32ul, | |
"typind" / cs.Int32ul, | |
"offset" / cs.Int32ul, | |
"seg" / cs.Int16ul, | |
"flags" / cs.Int8ul, | |
"name" / cs.CString(encoding = "utf8"), | |
), | |
#the stream starts at ctx._params.entry_offest offset in an input file, whereas ctx.PROCSYM32.pEnd is | |
#relative to the beginning of the file; cs.Int32ul accounts for the end-of-sequence marker | |
cs.Padding(lambda ctx: ctx.PROCSYM32.pEnd - ctx._params.entry_offest - ctx._io.tell() + cs.Int32ul.sizeof()), | |
"sepcodesyms" / cs.GreedyRange( | |
"SEPCODESYM" / cs.Struct( | |
"reclen" / cs.Int16ul, | |
"rectyp" / cs.Const(S_SEPCODE, cs.Int16ul), #range over all records with rectyp = S_SEPCODE | |
"pParent" / cs.Int32ul, # pointer to the parent | |
"pEnd" / cs.Int32ul, # pointer to this block's end | |
"length" / cs.Int32ul, # count of bytes of this block | |
"scf" / cs.Int32ul, # flags | |
"off" / cs.Int32ul, # sect:off of the separated code | |
"offParent" / cs.Int32ul, # sectParent:offParent of the enclosing scope | |
"sect" / cs.Int16ul, # (proc, block, or sepcode) | |
"sectParent" / cs.Int16ul, | |
cs.Padding(lambda ctx: ctx.pEnd - ctx._params.entry_offest - ctx._io.tell() + cs.Int32ul.sizeof()) | |
), | |
) | |
) | |
def list_code_blocks(pdb, base, fname): | |
fncs = list(filter(lambda s: s.leaf_type == S_PROCREF and s.name == fname, pdb.STREAM_GSYM.globals)) | |
if len(fncs) == 0: | |
print("There is no S_PROCREF-type reference to", fname, "in the global symbols stream.") | |
return | |
data = pdb.streams[pdb.STREAM_DBI.DBIExHeaders[fncs[0].iMod - 1].stream].data | |
fn = GlobalProc.parse(data[fncs[0].offset:], entry_offest = fncs[0].offset) | |
segaddr = pdb.STREAM_SECT_HDR.sections[fn.PROCSYM32.seg - 1].VirtualAddress + base | |
print("Function start:", hex(segaddr + fn.PROCSYM32.offset)) | |
print("Function end:", hex(segaddr + fn.PROCSYM32.offset + fn.PROCSYM32.len), "( length = ", fn.PROCSYM32.len, ")") | |
print("Separated blocks of code:") | |
for s in fn.sepcodesyms: | |
sectaddr = pdb.STREAM_SECT_HDR.sections[s.sect - 1].VirtualAddress + base | |
print("\t", "Block start:", hex(sectaddr + s.off)) | |
print("\t", "Block end:", hex(sectaddr + s.off + s.length), "( length = ", s.length, ")") | |
print() | |
if __name__ == '__main__': | |
ap = ArgumentParser(description = "Lists code blocks belonging to a function.") | |
ap.add_argument("-p", required = True, help = "a path to the pdb file") | |
ap.add_argument("-m", required = True, help = "a path to the dll file") | |
ap.add_argument("-n", required = True, help = "function name") | |
args = ap.parse_args() | |
pdb = pdbparse.parse(args.p) | |
pe = pefile.PE(args.m) | |
list_code_blocks(pdb, pe.OPTIONAL_HEADER.ImageBase, args.n) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Read this for more info.