Skip to content

Instantly share code, notes, and snippets.

@alexander-hanel
Created April 10, 2023 22:43
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save alexander-hanel/7ad79cdbfcf08e0e56b17817267382dd to your computer and use it in GitHub Desktop.
Save alexander-hanel/7ad79cdbfcf08e0e56b17817267382dd to your computer and use it in GitHub Desktop.
import re
from anytree import Node, RenderTree, find
"""
Author: Alexander Hanel
Description: POC for displaying function names as a folder-like structure. Relies on function names being labled with a pdb.
Version: 0.5 - 2023/04/10
Execution: open script in IDA, run export_layout() to save to file or export_layout() to print to command line
TODO:
- review how mangled names are used in IDA. I'm seeing some strange results.
- aad functionality for Go function names
- maybe create folders in IDA
- dig deeper into how function names are parsed in C/C++. MS has some weird naming conventions
Example Output from clr.dll
├── CInstall
│ ├── Ref
│ │ ├── Enum
│ │ │ ├── public: void * CInstallRefEnum::`scalar deleting destructor'(unsigned int)
│ │ │ ├── public: long CInstallRefEnum::CleanUpRegKeys(void)
│ │ │ ├── public: static long CInstallRefEnum::Create(struct IAssemblyName *,enum GACSLICE,int,class CInstallRefEnum * *)
│ │ │ ├── private: long CInstallRefEnum::GetNextIdentifier(unsigned short *,unsigned long *,unsigned char *,unsigned long *)
│ │ │ ├── public: long CInstallRefEnum::GetNextRef(unsigned long,unsigned short *,unsigned long *,unsigned short *,unsigned long *,unsigned long *,void *)
│ │ │ ├── private: long CInstallRefEnum::GetNextScheme(void)
│ │ │ ├── private: long CInstallRefEnum::Init(struct IAssemblyName *,enum GACSLICE,int)
│ │ │ └── private: long CInstallRefEnum::ValidateRegKey(struct HKEY__ * &)
│ │ ├── public: long CInstallRef::AddReference(void)
│ │ ├── public: long CInstallRef::DeleteReference(void)
│ │ └── public: long CInstallRef::Initialize(void)
│ └── Reference
│ ├── Item
│ │ ├── public: void * CInstallReferenceItem::`scalar deleting destructor'(unsigned int)
│ │ ├── public: virtual unsigned long CInstallReferenceItem::AddRef(void)
│ │ ├── public: virtual long CInstallReferenceItem::GetReference(struct _FUSION_INSTALL_REFERENCE_ * *,unsigned long,void *)
│ │ ├── public: virtual long CInstallReferenceItem::QueryInterface(struct _GUID const &,void * *)
│ │ └── public: virtual unsigned long CInstallReferenceItem::Release(void)
│ └── Enum
│ ├── public: virtual long CInstallReferenceEnum::GetNextInstallReferenceItem(struct IInstallReferenceItem * *,unsigned long,void *)
│ └── public: virtual unsigned long CInstallReferenceEnum::Release(void)
├── CVerify
│ └── Ref
│ └── Node
│ ├── public: void * CVerifyRefNode::`scalar deleting destructor'(unsigned int)
│ └── public: long CVerifyRefNode::Init(unsigned long,unsigned short const *)
"""
# TODO
KEYWORDS = ['alignas', 'constinit', 'false', 'public', 'true', 'alignof', 'const_cast', 'float', 'register',
'reinterpret_cast', 'typedef', 'auto', 'co_await', 'friend', 'requires', 'typeid', 'bool',
'co_return', 'typename', 'co_yield', 'short', 'union','decltype', 'signed', 'unsigned',
'catch', 'default', 'int', 'sizeof', 'using', 'char', 'delete', 'long', 'static', 'virtual',
'char8_t', 'mutable', 'static_assert', 'void', 'char16_t', 'double', 'namespace', 'static_cast',
'volatile', 'char32_t', 'dynamic_cast', 'new', 'struct', 'wchar_t', 'class', 'concept', 'enum',
'nullptr', 'template', 'const', 'explicit', 'operator', 'this', 'consteval', 'export', 'private',
'thread_local', 'constexpr', 'extern', 'protected', 'throw', "*"]
def test_data():
with open("demangled_functions.txt", "r") as f:
data_lines = [line.rstrip() for line in f]
return data_lines
def create_nodes(item_names):
"""
converts a list (e.g [1,2,3]) into list of nodes (e.g.[(1,2), (2,3)])
:param item_names: list
:return:
"""
if len(item_names) == 1:
return item_names
else:
return [(item_names[c], item_names[c+1] ) for c,y in enumerate(item_names[:-1])]
def parse_ms_pdb_func_def(declaration):
# notes: https://learn.microsoft.com/en-us/cpp/cpp/functions-cpp?source=recommendations&view=msvc-170
scope_resolution_operator = "::"
parentheses_start = "("
template_start = "<"
method_name = None
split_function = declaration.split(parentheses_start)
# split function declaration by parentheses
# public: virtual long RegMeta::MergeEnd(void)
# ^-- split here
# the first parentheses will always be present or its not a function declaration
# get everything before "("
split_declaration = split_function[0]
# three uses cases for parsing out function/method names
# 1. scope resolution operator is present (e.g. "public: virtual long RegMeta::MergeEnd")
# 2. not present (e.g. "int ScaleResult)
if scope_resolution_operator in split_declaration:
# "public: virtual long RegMeta::MergeEnd" becomes ['public: virtual long RegMeta', 'MergeEnd']
temp_class_def = split_declaration.split(scope_resolution_operator)
if template_start in temp_class_def[0]:
# examples
# public: CChainedHash<struct MDTOKENHASH>::~CChainedHash<struct MDTOKENHASH>(void)
# public: struct MDTOKENHASH * CChainedHash<struct MDTOKENHASH>::Add(void const *)
method_name = temp_class_def[-1]
# split by template
temp_split = temp_class_def[0].split(template_start)
temp_name = temp_split[0]
function_name = temp_name.split()[-1]
# function_name = token[-1]
else:
method_name = temp_class_def[-1]
tokens = temp_class_def[0].split()
# not exactly function, it would actually be a class but it simplifies the code
function_name = tokens[-1]
else:
tokens = split_declaration.split()
function_name = tokens[-1]
# split up the pascal case into substrings
if function_name.isupper() and function_name.isalpha():
return ([function_name], method_name )
else:
temp_matches = re.findall('[A-Z]{1,4}[^A-Z]*', function_name)
return (temp_matches, method_name)
def parse(declarations):
names = set([])
root = Node("root") # create parent node
for declaration in declarations:
temp_matches, method_name = parse_ms_pdb_func_def(declaration)
if len(temp_matches) > 1:
names.add(temp_matches[0])
temp_nodes = create_nodes(temp_matches)
if not temp_nodes:
continue
# init
p_node = None
for _node in temp_nodes:
p, c = _node
# determine if key exists under root
if p_node is None:
r_match = find(root, lambda node: node.name == p, maxlevel=2)
if r_match:
# node already exists and is
p_node = r_match
else:
# p_node is now has a child of parent
p_node = Node(p, parent=root)
r_match = find(p_node, lambda node: node.name == c, maxlevel=2)
if r_match:
p_node = r_match
else:
p_node = Node(c, parent=p_node)
Node(declaration, parent=p_node)
return root
def _print(root):
for pre, fill, node in RenderTree(root):
print("%s%s" % (pre, node.name))
def _save_layout(root):
idb_path = idc.get_idb_path()
import sys
import os
if sys.version_info[0] < 3.4:
import pathlib
suffix = pathlib.Path(idb_path).suffix
else:
suffix = os.path.splitext(idb_path)[1]
text_path = idb_path.replace(suffix, ".txt")
with open(text_path, "w") as export_file:
for pre, fill, node in RenderTree(root):
export_file.write("%s%s\n" % (pre, node.name))
print("Export written to %s" % text_path)
def get_function_names():
import idautils
import idc
func_names = []
for mangled in idautils.Functions():
demangled = idc.demangle_name(idaapi.get_func_name(mangled), idc.INF_SHORT_DN)
if demangled:
func_names.append(demangled)
else:
temp_name = idaapi.get_func_name(mangled)
if temp_name.startswith("sub_") or "@@" in temp_name:
continue
func_names.append(temp_name)
return func_names
def test():
lines = test_data()
parse(lines)
def print_layout():
lines = get_function_names()
root = parse(lines)
_print(root)
def export_layout():
lines = get_function_names()
root = parse(lines)
_save_layout(root)
@alexander-hanel
Copy link
Author

More output

root
├── Get
│   ├── Thread
│   │   ├── Generic
│   │   │   └── GetThreadGeneric
│   │   ├── GetThread
│   │   ├── Context
│   │   │   └── struct SN_THREAD_CTX * GetThreadContext(void)
│   │   └── UICulture
│   │       ├── Id
│   │       │   └── GetThreadUICultureId
│   │       └── Names
│   │           └── GetThreadUICultureNames
│   ├── App
│   │   ├── Domain
│   │   │   ├── Generic
│   │   │   │   └── GetAppDomainGeneric
│   │   │   └── GetAppDomain
│   │   ├── Container
│   │   │   └── Token
│   │   │       └── Info
│   │   │           └── For
│   │   │               └── Process
│   │   │                   └── GetAppContainerTokenInfoForProcess
│   │   └── Id
│   │       └── Authority
│   │           ├── GetAppIdAuthority
│   │           └── long isocom_GetAppIdAuthority(struct IAppIdAuthority * *)
│   ├── TLSDummy
│   │   └── GetTLSDummy
│   ├── Current
│   │   ├── SP
│   │   │   └── GetCurrentSP
│   │   ├── IP
│   │   │   └── GetCurrentIP
│   │   ├── Saved
│   │   │   └── Redirect
│   │   │       └── Context
│   │   │           └── GetCurrentSavedRedirectContext
│   │   ├── Thread
│   │   │   ├── Type
│   │   │   │   └── NT5
│   │   │   │       └── long GetCurrentThreadTypeNT5(enum _THDTYPE *)
│   │   │   └── Helper
│   │   │       └── GetCurrentThreadHelper
│   │   ├── Process
│   │   │   └── Cpu
│   │   │       └── Count
│   │   │           └── int GetCurrentProcessCpuCount(void)
│   │   ├── Nls
│   │   │   └── Hash
│   │   │       └── Provider
│   │   │           └── class COMNlsHashProvider * GetCurrentNlsHashProvider(void)
│   │   ├── Apartment
│   │   │   └── Type
│   │   │       └── NT5
│   │   │           └── long GetCurrentApartmentTypeNT5(struct IObjectContext *,enum _APTTYPE *)
│   │   ├── User
│   │   │   └── SID
│   │   │       └── long GetCurrentUserSID(unsigned short *,unsigned long)
│   │   ├── Ctx
│   │   │   └── Cookie
│   │   │       └── void * GetCurrentCtxCookie(void)
│   │   ├── Module
│   │   │   └── File
│   │   │       └── Name
│   │   │           └── long GetCurrentModuleFileName(unsigned short *,unsigned long *)
│   │   ├── Task
│   │   │   └── Type
│   │   │       └── enum ETaskType GetCurrentTaskType(void)
│   │   ├── Exception
│   │   │   └── Pointers
│   │   │       └── void GetCurrentExceptionPointers(struct _EXCEPTION_POINTERS *)
│   │   ├── Scope
│   │   │   └── GetCurrentScope
│   │   └── Frame
│   │       └── Pointer
│   │           └── From
│   │               └── Stack
│   │                   └── Trace
│   │                       └── For
│   │                           └── Trace
│   │                               └── Call
│   │                                   └── class FramePointer GetCurrentFramePointerFromStackTraceForTraceCall(class Thread *)
│   ├── RBP
│   │   └── GetRBP
│   ├── Execution
│   │   └── Engine
│   │       └── struct IExecutionEngine * GetExecutionEngine(void)
│   ├── EEMemory
│   │   └── Manager
│   │       └── struct IEEMemoryManager * GetEEMemoryManager(void)
│   ├── Enclosing
│   │   └── Method
│   │       └── Table
│   │           └── GetEnclosingMethodTable
│   ├── Module
│   │   ├── class Module * GetModule(struct CORINFO_MODULE_STRUCT_ *)
│   │   └── Handle
│   │       └── W
│   │           └── public: SimpleModuleHolder<&struct HINSTANCE__ * GetModuleHandleW(unsigned short const *),0>::~SimpleModuleHolder<&struct HINSTANCE__ * GetModuleHandleW(unsigned short const *),0>(void)
│   ├── Process
│   │   ├── Memory
│   │   │   └── Load
│   │   │       └── void GetProcessMemoryLoad(struct _MEMORYSTATUSEX *)
│   │   ├── Executable
│   │   │   └── Heap@@
│   │   │       └── YAPE
│   │   │           └── AXXZ
│   │   │               └── __imp_load_?GetProcessExecutableHeap@@YAPEAXXZ
│   │   ├── GUID
│   │   │   └── unsigned short * GetProcessGUID(void)
│   │   └── Exe
│   │       └── Path
│   │           └── long GetProcessExePath(unsigned short const * *)
│   ├── Restricted
│   │   ├── Physical
│   │   │   └── Memory
│   │   │       └── Limit
│   │   │           └── GetRestrictedPhysicalMemoryLimit
│   │   └── Error
│   │       ├── Info
│   │       │   └── From
│   │       │       └── Error
│   │       │           └── Object
│   │       │               └── struct IRestrictedErrorInfo * GetRestrictedErrorInfoFromErrorObject(class Object *)
│   │       └── Details
│   │           └── long GetRestrictedErrorDetails(struct IRestrictedErrorInfo *,unsigned short * *,unsigned short * *,long *,unsigned short * *)
│   ├── Runtime
│   │   ├── Type
│   │   │   └── Helper
│   │   │       └── GetRuntimeTypeHelper
│   │   ├── Module
│   │   │   └── Helper
│   │   │       └── class ReflectModuleBaseObject * GetRuntimeModuleHelper(void *,class Module *,class Object *)
│   │   ├── Function
│   │   │   └── Callback
│   │   │       └── GetRuntimeFunctionCallback
│   │   ├── Assembly
│   │   │   └── Helper
│   │   │       └── class AssemblyBaseObject * GetRuntimeAssemblyHelper(void *,class DomainAssembly *,class Object *)
│   │   ├── Version
│   │   │   └── For
│   │   │       └── Assembly
│   │   │           ├── long GetRuntimeVersionForAssembly(struct IMetaDataAssemblyImport *,class RUNTIMEVERSIONINFO *)
│   │   │           └── long GetRuntimeVersionForAssembly(struct IAssemblyManifestImport *,class RUNTIMEVERSIONINFO *)
│   │   ├── Id_
│   │   │   └── Wrapper
│   │   │       └── int GetRuntimeId_Wrapper(void)
│   │   └── Stack
│   │       └── Walk
│   │           └── Info
│   │               └── GetRuntimeStackWalkInfo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment