Skip to content

Instantly share code, notes, and snippets.

@Auscitte
Last active July 3, 2021 01:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Auscitte/e2f7d69f4a1023ba64d8189995073399 to your computer and use it in GitHub Desktop.
Save Auscitte/e2f7d69f4a1023ba64d8189995073399 to your computer and use it in GitHub Desktop.
Lists code blocks belonging to a function with the help of pdbparse library
# This file is a copy of gdata.py from pdbparse library ver. 1.5
# (see https://github.com/moyix/pdbparse)
# with a few mofifications that are necessary for my scripts to work correctly
# Ry Auscitte
# Python 2 and 3
from construct import *
gsym = Struct(
"leaf_type" / Int16ul, "data" / Switch(
lambda ctx: ctx.leaf_type, {
0x110E:
"data_v3" / Struct(
"symtype" / Int32ul,
"offset" / Int32ul,
"segment" / Int16ul,
"name" / CString(encoding = "utf8"),
),
0x1009:
"data_v2" / Struct(
"symtype" / Int32ul,
"offset" / Int32ul,
"segment" / Int16ul,
"name" / PascalString(lengthfield = "length" / Int8ul, encoding = "utf8"),
),
0x1125: #from struct REFSYM2 in cvinfo.h
"proc_ref" / Struct(
"sumname" / Int32ul,
"offset" / Int32ul,
"iMod" / Int16ul,
"name" / CString(encoding = "utf8"),
),
0x1127: #from struct REFSYM2 in cvinfo.h
"proc_ref" / Struct(
"sumname" / Int32ul,
"offset" / Int32ul,
"iMod" / Int16ul,
"name" / CString(encoding = "utf8"),
),
0x1108: #from struct UDTSYM in cvinfo.h
"udt" / Struct(
"typind" / Int32ul,
"name" / CString(encoding = "utf8"),
),
0x110d: #from struct DATASYM32 in cvinfo.h
"datasym" / Struct(
"typind" / Int32ul,
"offset" / Int32ul,
"segment" / Int16ul,
"name" / CString(encoding = "utf8"),
),
0x110c:
"datasym" / Struct(
"typind" / Int32ul,
"offset" / Int32ul,
"segment" / Int16ul,
"name" / CString(encoding = "utf8"),
),
0x1107:
"const" / Struct(
"typind" / Int32ul, # Type index (containing enum if enumerate) or metadata token
"value" / Int16ul, # numeric leaf containing value
"name" / CString(encoding = "utf8"),
),
}))
GlobalsData = "globals" / GreedyRange(
Struct(
"length" / Int16ul,
"symbol" / RestreamData(Bytes(lambda ctx: ctx.length), gsym),
))
def parse(data):
con = GlobalsData.parse(data)
return merge_structures(con)
def parse_stream(stream):
con = GlobalsData.parse_stream(stream)
return merge_structures(con)
def merge_structures(con):
new_cons = []
for sym in con:
sym_dict = {'length': sym.length, 'leaf_type': sym.symbol.leaf_type}
if sym.symbol.data:
#RAusc:
for k in sym.symbol.data.keys():
sym_dict[k] = sym.symbol.data[k]
#sym_dict.update({
# 'symtype': sym.symbol.data.symtype,
# 'offset': sym.symbol.data.offset,
# 'segment': sym.symbol.data.segment,
# 'name': sym.symbol.data.name
#})
new_cons.append(Container(sym_dict))
result = ListContainer(new_cons)
return result
""" Lists code blocks belonging to a function with the help of pdbparse library.
The script is incomplete and aims to merely demonstrate one of possible methods.
:Copyright:
Ry Auscitte 2020. This script is distributed under GPL.
:Authors:
Ry Auscitte
"""
import pdbparse
import pefile
import sys
import construct as cs
from argparse import ArgumentParser
S_PROCREF = 0x1125 #reference to a procedure as defined in https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
S_SEPCODE = 0x1132 #separated code
# The parsing constructs below folow the definitions of SEPCODESYM and PROCSYM32
# from https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
GlobalProc = cs.Struct(
"PROCSYM32" / cs.Struct(
"reclen" / cs.Int16ul,
"rectyp" / cs.Int16ul,
"pParent" / cs.Int32ul,
"pEnd" / cs.Int32ul,
"pNext" / cs.Int32ul,
"len" / cs.Int32ul,
"DbgStart" / cs.Int32ul,
"DbgEnd" / cs.Int32ul,
"typind" / cs.Int32ul,
"offset" / cs.Int32ul,
"seg" / cs.Int16ul,
"flags" / cs.Int8ul,
"name" / cs.CString(encoding = "utf8"),
),
#the stream starts at ctx._params.entry_offest offset in an input file, whereas ctx.PROCSYM32.pEnd is
#relative to the beginning of the file; cs.Int32ul accounts for the end-of-sequence marker
cs.Padding(lambda ctx: ctx.PROCSYM32.pEnd - ctx._params.entry_offest - ctx._io.tell() + cs.Int32ul.sizeof()),
"sepcodesyms" / cs.GreedyRange(
"SEPCODESYM" / cs.Struct(
"reclen" / cs.Int16ul,
"rectyp" / cs.Const(S_SEPCODE, cs.Int16ul), #range over all records with rectyp = S_SEPCODE
"pParent" / cs.Int32ul, # pointer to the parent
"pEnd" / cs.Int32ul, # pointer to this block's end
"length" / cs.Int32ul, # count of bytes of this block
"scf" / cs.Int32ul, # flags
"off" / cs.Int32ul, # sect:off of the separated code
"offParent" / cs.Int32ul, # sectParent:offParent of the enclosing scope
"sect" / cs.Int16ul, # (proc, block, or sepcode)
"sectParent" / cs.Int16ul,
cs.Padding(lambda ctx: ctx.pEnd - ctx._params.entry_offest - ctx._io.tell() + cs.Int32ul.sizeof())
),
)
)
def list_code_blocks(pdb, base, fname):
fncs = list(filter(lambda s: s.leaf_type == S_PROCREF and s.name == fname, pdb.STREAM_GSYM.globals))
if len(fncs) == 0:
print("There is no S_PROCREF-type reference to", fname, "in the global symbols stream.")
return
data = pdb.streams[pdb.STREAM_DBI.DBIExHeaders[fncs[0].iMod - 1].stream].data
fn = GlobalProc.parse(data[fncs[0].offset:], entry_offest = fncs[0].offset)
segaddr = pdb.STREAM_SECT_HDR.sections[fn.PROCSYM32.seg - 1].VirtualAddress + base
print("Function start:", hex(segaddr + fn.PROCSYM32.offset))
print("Function end:", hex(segaddr + fn.PROCSYM32.offset + fn.PROCSYM32.len), "( length = ", fn.PROCSYM32.len, ")")
print("Separated blocks of code:")
for s in fn.sepcodesyms:
sectaddr = pdb.STREAM_SECT_HDR.sections[s.sect - 1].VirtualAddress + base
print("\t", "Block start:", hex(sectaddr + s.off))
print("\t", "Block end:", hex(sectaddr + s.off + s.length), "( length = ", s.length, ")")
print()
if __name__ == '__main__':
ap = ArgumentParser(description = "Lists code blocks belonging to a function.")
ap.add_argument("-p", required = True, help = "a path to the pdb file")
ap.add_argument("-m", required = True, help = "a path to the dll file")
ap.add_argument("-n", required = True, help = "function name")
args = ap.parse_args()
pdb = pdbparse.parse(args.p)
pe = pefile.PE(args.m)
list_code_blocks(pdb, pe.OPTIONAL_HEADER.ImageBase, args.n)
@Auscitte
Copy link
Author

Auscitte commented Jul 3, 2021

Read this for more info.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment