Created
June 21, 2012 21:19
-
-
Save mridgers/2968595 to your computer and use it in GitHub Desktop.
Small tool to list and query symbols in PDB files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//------------------------------------------------------------------------------ | |
// pdbdump.c - dump symbols from .pdb and executable files (public domain). | |
// - to compile; cl.exe /Ox /Zi pdbdump.c | |
// - | |
// - Martin Ridgers, pdbdump 'at' fireproofgravy.co.uk | |
//------------------------------------------------------------------------------ | |
#include <stdio.h> | |
#include <Windows.h> | |
#include <DbgHelp.h> | |
#pragma comment(lib, "dbghelp.lib") | |
//------------------------------------------------------------------------------ | |
const char* g_usage = | |
"pdbdump.exe - dump symbols from .pdb and executable files." "\n" | |
" Martin Ridgers, pdbdump 'at' fireproofgravy.co.uk" "\n" | |
"\n" | |
" Usage: pdbdump.exe [-csv] [-sasnf] [-r] pdb_or_exe[:BASE]" "\n" | |
" -t: Enumerate types." "\n" | |
" -csv: Output comma-separated-values format." "\n" | |
" -s[asnf]: Sort by (a)ddress, (s)ize, (n)ame, or (f)ile. ASNF to reverse." "\n" | |
" -r: Resolve names and addresses read from stdin." "\n" | |
" -w[...]: Wildcard to use when enumerating symbols." "\n" | |
"\n" | |
" By default modules (.pdb or .exe files) are loaded with a base address of" "\n" | |
" 0x400000. This can be overriden by adding a :BASE suffix to the module's" "\n" | |
" file name. For example; my_project.pdb:0x20030000." "\n" | |
"\n" | |
" Examples: 1. Output all symbols from a.pdb and b.dll;" "\n" | |
" > pdbdump.exe a.pdb b.dll" "\n" | |
" 2. Output all of a.pdb's function symbols in CSV format;" "\n" | |
" > pdbdump.exe -csv a.pdb | findstr SymTagFunction" "\n" | |
" 3. List all symbols starting with 'is_enab';" "\n" | |
" > pdbdump.exe -wis_enab* a.pdb" "\n" | |
" 4. Resolve two symbols by name and by address;" "\n" | |
" > echo 0x401000 is_enabled | pdbdump.exe -r a.pdb" "\n" | |
; | |
//------------------------------------------------------------------------------ | |
#define ENABLE_DEBUG_OUTPUT 0 | |
#define ASSERT(x, m, ...) if (!(x)) { fprintf(stderr, m, __VA_ARGS__); \ | |
exit(-1); } | |
#define ONE_MB (1024 * 1024) | |
//------------------------------------------------------------------------------ | |
enum e_mode | |
{ | |
e_mode_resolve_stdin, | |
e_mode_enum_symbols, | |
}; | |
//------------------------------------------------------------------------------ | |
enum e_enum_type | |
{ | |
e_enum_type_symbols, | |
e_enum_type_types | |
}; | |
//------------------------------------------------------------------------------ | |
struct _sym_info | |
{ | |
DWORD64 addr; | |
int size; | |
char* name; | |
char* file; | |
int tag : 8; | |
int line : 24; | |
}; | |
typedef struct _sym_info sym_info_t; | |
//------------------------------------------------------------------------------ | |
struct _pool | |
{ | |
char* base; | |
int committed; | |
int size; | |
int used; | |
}; | |
typedef struct _pool pool_t; | |
//------------------------------------------------------------------------------ | |
typedef int (sort_func_t)(const sym_info_t*, const sym_info_t*); | |
int g_page_size = 0; | |
HANDLE g_handle = (HANDLE)0x493; | |
int g_csv_output = 0; | |
int g_sym_count = 0; | |
enum e_mode g_mode = e_mode_enum_symbols; | |
int g_sort_order = 1; | |
sort_func_t* g_sort_func = NULL; | |
enum e_enum_type g_enum_type = e_enum_type_symbols; | |
const char* g_wildcard = "*"; | |
pool_t g_symbol_pool; | |
pool_t g_string_pool; | |
extern const char* g_sym_tag_names[]; /* ...at end of file */ | |
//------------------------------------------------------------------------------ | |
void pool_create(pool_t* pool, int size) | |
{ | |
pool->base = (char*)VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_READWRITE); | |
pool->size = size; | |
pool->committed = 0; | |
pool->used = 0; | |
} | |
//------------------------------------------------------------------------------ | |
void pool_destroy(pool_t* pool) | |
{ | |
VirtualFree(pool->base, 0, MEM_RELEASE); | |
} | |
//------------------------------------------------------------------------------ | |
void pool_clear(pool_t* pool) | |
{ | |
pool->used = 0; | |
} | |
//------------------------------------------------------------------------------ | |
void* pool_alloc(pool_t* pool, int size) | |
{ | |
int i; | |
char* addr; | |
ASSERT(size < g_page_size, "Allocation to large!"); | |
i = pool->used + size; | |
if (i >= pool->committed) | |
{ | |
ASSERT(i < pool->size, "Memory pool exhausted."); | |
VirtualAlloc((void*)(pool->base + pool->committed), g_page_size, | |
MEM_COMMIT, PAGE_READWRITE | |
); | |
pool->committed += g_page_size; | |
} | |
addr = pool->base + pool->used; | |
pool->used += size; | |
return addr; | |
} | |
//------------------------------------------------------------------------------ | |
int sort_addr(const sym_info_t* lhs, const sym_info_t* rhs) | |
{ | |
return (int)(lhs->addr - rhs->addr) * g_sort_order; | |
} | |
//------------------------------------------------------------------------------ | |
int sort_size(const sym_info_t* lhs, const sym_info_t* rhs) | |
{ | |
return (lhs->size - rhs->size) * g_sort_order; | |
} | |
//------------------------------------------------------------------------------ | |
int sort_name(const sym_info_t* lhs, const sym_info_t* rhs) | |
{ | |
return _stricmp(lhs->name, rhs->name) * g_sort_order; | |
} | |
//------------------------------------------------------------------------------ | |
int sort_file(const sym_info_t* lhs, const sym_info_t* rhs) | |
{ | |
return _stricmp(lhs->file, rhs->file) * g_sort_order; | |
} | |
//------------------------------------------------------------------------------ | |
void print_info(const char* info, ...) | |
{ | |
va_list va; | |
va_start(va, info); | |
vfprintf(stderr, info, va); | |
va_end(va); | |
} | |
//------------------------------------------------------------------------------ | |
void dbghelp_to_sym_info(SYMBOL_INFO* info, sym_info_t* sym_info) | |
{ | |
BOOL ok; | |
DWORD disp; | |
IMAGEHLP_LINE64 line; | |
// General properties | |
sym_info->addr = info->Address; | |
sym_info->size = info->Size; | |
sym_info->tag = info->Tag; | |
// Symbol name | |
sym_info->name = pool_alloc(&g_string_pool, info->NameLen + 1); | |
strcpy(sym_info->name, info->Name); | |
// Get file and line number info. | |
line.SizeOfStruct = sizeof(line); | |
ok = SymGetLineFromAddr64(g_handle, info->Address, &disp, &line); | |
if ((ok != FALSE) && line.FileName) | |
{ | |
sym_info->line = line.LineNumber; | |
sym_info->file = pool_alloc(&g_string_pool, strlen(line.FileName) + 1); | |
strcpy(sym_info->file, line.FileName); | |
} | |
else | |
{ | |
sym_info->line = 0; | |
sym_info->file = "?"; | |
} | |
} | |
//------------------------------------------------------------------------------ | |
BOOL CALLBACK enum_proc(SYMBOL_INFO* info, ULONG size, void* param) | |
{ | |
sym_info_t* sym_info; | |
sym_info = (sym_info_t*)pool_alloc(&g_symbol_pool, sizeof(sym_info_t)); | |
dbghelp_to_sym_info(info, sym_info); | |
if (!(g_sym_count % 100)) | |
{ | |
print_info("\r%d", g_sym_count); | |
} | |
++g_sym_count; | |
return TRUE; | |
} | |
//------------------------------------------------------------------------------ | |
void print_symbol(const sym_info_t* sym_info) | |
{ | |
const char* format; | |
format = "%016llx %10d %-21s %-32s %s(%d)\n"; | |
if (g_csv_output) | |
{ | |
format = "\"%llx\",%d,\"%s\",\"%s\",\"%s\",%d\n"; | |
} | |
printf( | |
format, sym_info->addr, sym_info->size, g_sym_tag_names[sym_info->tag], | |
sym_info->name, sym_info->file, sym_info->line | |
); | |
} | |
//------------------------------------------------------------------------------ | |
int create_pools(uintptr_t base_addr) | |
{ | |
BOOL ok; | |
FILE* in; | |
int size, i; | |
const char* guide; | |
// Fetch PDB file for the module. | |
IMAGEHLP_MODULE64 module = { sizeof(module) }; | |
ok = SymGetModuleInfo64(g_handle, base_addr, &module); | |
ASSERT(ok != FALSE, "Unexpected failure from SymGetSymbolFile()."); | |
guide = module.LoadedPdbName; | |
// An .exe with no symbols available? | |
if (!guide || guide[0] == '\0') | |
{ | |
return 0; | |
} | |
// Get file size. | |
in = fopen(guide, "rb"); | |
ASSERT(in != NULL, "Failed to open pool-size guide file."); | |
fseek(in, 0, SEEK_END); | |
size = ftell(in); | |
fclose(in); | |
// Use anecdotal evidence to guess at suitable pool sizes :). | |
i = size / 4; | |
pool_create(&g_string_pool, (i < ONE_MB) ? ONE_MB : i); | |
i = size / 25; | |
pool_create(&g_symbol_pool, (i < ONE_MB) ? ONE_MB : i); | |
return 1; | |
} | |
//------------------------------------------------------------------------------ | |
uintptr_t load_module(const char* pdb_file) | |
{ | |
char buffer[512]; | |
char* colon; | |
uintptr_t base_addr = 0x400000; | |
strncpy(buffer, pdb_file, 512); | |
buffer[sizeof(buffer) - 1] = '\0'; | |
// Is there a base address tag on the end of the file name? | |
colon = strrchr(buffer, ':'); | |
if (colon && (ptrdiff_t)(colon - buffer) > 1) | |
{ | |
*colon++ = '\0'; | |
base_addr = (uintptr_t)_strtoui64(colon, NULL, 0); | |
} | |
base_addr = (size_t)SymLoadModuleEx(g_handle, NULL, buffer, NULL, | |
base_addr, 0x7fffffff, NULL, 0 | |
); | |
return base_addr; | |
} | |
//------------------------------------------------------------------------------ | |
void output_symbols(const char* pdb_file) | |
{ | |
int i; | |
uintptr_t base_addr; | |
DWORD ok; | |
// Load module. | |
base_addr = load_module(pdb_file); | |
if (!base_addr) | |
{ | |
print_info("Failed to load symbols for '%s' (Error %d)", pdb_file, | |
GetLastError() | |
); | |
return; | |
} | |
if (!create_pools(base_addr)) | |
{ | |
print_info("No symbols found for '%s'", pdb_file); | |
return; | |
} | |
g_sym_count = 0; | |
// Do the enumeration. | |
print_info("Enumerating...\n"); | |
switch (g_enum_type) | |
{ | |
case e_enum_type_symbols: | |
SymEnumSymbols(g_handle, base_addr, g_wildcard, enum_proc, NULL); | |
break; | |
case e_enum_type_types: | |
SymEnumTypes(g_handle, base_addr, enum_proc, NULL); | |
break; | |
} | |
print_info("\r%d\n...Done!\n", g_sym_count); | |
// Done. | |
ok = SymUnloadModule64(g_handle, (DWORD64)base_addr); | |
ASSERT(ok != FALSE, "Failed unloading module."); | |
// Sort. | |
if (g_sort_func != NULL) | |
{ | |
qsort(g_symbol_pool.base, g_sym_count, sizeof(sym_info_t), | |
(int (*)(const void*, const void*))g_sort_func | |
); | |
} | |
// Print to stdout | |
for (i = 0; i < g_sym_count; ++i) | |
{ | |
sym_info_t* sym_info = ((sym_info_t*)g_symbol_pool.base) + i; | |
print_symbol(sym_info); | |
} | |
pool_destroy(&g_string_pool); | |
pool_destroy(&g_symbol_pool); | |
} | |
//------------------------------------------------------------------------------ | |
void resolve_stdin() | |
{ | |
pool_create(&g_string_pool, g_page_size); | |
while (!feof(stdin)) | |
{ | |
int i; | |
int state; | |
char buffer[256]; | |
BOOL ok; | |
struct { | |
SYMBOL_INFO info; | |
char name_buf[256]; | |
} si; | |
si.info.SizeOfStruct = sizeof(si.info); | |
si.info.MaxNameLen = sizeof(si.name_buf); | |
// Parse things on the command line. | |
state = 0; | |
i = 0; | |
while (!feof(stdin) && (i < sizeof(buffer) - 1)) | |
{ | |
fread(buffer + i, 1, 1, stdin); | |
if (!!isspace(buffer[i]) == state) | |
{ | |
if (++state > 1) | |
{ | |
break; | |
} | |
} | |
i += state; | |
} | |
buffer[i] = '\0'; | |
if (i == 0) | |
{ | |
continue; | |
} | |
if (isdigit(buffer[0])) | |
{ | |
DWORD64 addr = (DWORD64)_strtoui64(buffer, NULL, 0); | |
ok = SymFromAddr(g_handle, addr, NULL, &si.info); | |
} | |
else | |
{ | |
ok = SymFromName(g_handle, buffer, &si.info); | |
} | |
pool_clear(&g_string_pool); | |
if (ok != FALSE) | |
{ | |
sym_info_t sym_info; | |
dbghelp_to_sym_info(&si.info, &sym_info); | |
print_symbol(&sym_info); | |
} | |
} | |
pool_destroy(&g_string_pool); | |
} | |
//------------------------------------------------------------------------------ | |
void parse_args(int argc, char** argv) | |
{ | |
int i; | |
for (i = 0; i < argc; ++i) | |
{ | |
const char* arg = argv[i]; | |
if (strcmp(arg, "-csv") == 0) | |
{ | |
g_csv_output = 1; | |
} | |
else if (strncmp(arg, "-s", 2) == 0) | |
{ | |
char c = arg[2]; | |
c = isupper(c) ? tolower(c) : c; | |
switch (c) | |
{ | |
case '\0': | |
case 'a': g_sort_func = sort_addr; break; | |
case 's': g_sort_func = sort_size; break; | |
case 'n': g_sort_func = sort_name; break; | |
case 'f': g_sort_func = sort_file; break; | |
} | |
g_sort_order = (arg[2] < 'a') ? -1 : 1; | |
} | |
else if (strcmp(arg, "-r") == 0) | |
{ | |
g_mode = e_mode_resolve_stdin; | |
} | |
else if (strcmp(arg, "-t") == 0) | |
{ | |
g_enum_type = e_enum_type_types; | |
} | |
else if (strncmp(arg, "-w", 2) == 0) | |
{ | |
if (arg[2] != '\0') | |
{ | |
g_wildcard = arg + 2; | |
} | |
} | |
} | |
} | |
//------------------------------------------------------------------------------ | |
int main(int argc, char** argv) | |
{ | |
int i; | |
BOOL ok; | |
DWORD options; | |
SYSTEM_INFO sys_info; | |
if (argc <= 1) | |
{ | |
puts(g_usage); | |
return -1; | |
} | |
--argc; | |
++argv; | |
parse_args(argc, argv); | |
// Get page size. | |
GetSystemInfo(&sys_info); | |
g_page_size = sys_info.dwPageSize; | |
// Initialise DbgHelp | |
options = SymGetOptions(); | |
options &= ~SYMOPT_DEFERRED_LOADS; | |
options |= SYMOPT_LOAD_LINES; | |
options |= SYMOPT_IGNORE_NT_SYMPATH; | |
#if ENABLE_DEBUG_OUTPUT | |
options |= SYMOPT_DEBUG; | |
#endif | |
options |= SYMOPT_UNDNAME; | |
SymSetOptions(options); | |
ok = SymInitialize(g_handle, NULL, FALSE); | |
ASSERT(ok != FALSE, "Failed to initialise symbol handler."); | |
// Output each .PDB file specified on the command line. | |
switch (g_mode) | |
{ | |
case e_mode_enum_symbols: | |
for (i = 0; i < argc; ++i) | |
{ | |
const char* arg = argv[i]; | |
if (arg[0] != '-') | |
{ | |
output_symbols(arg); | |
} | |
} | |
break; | |
case e_mode_resolve_stdin: | |
for (i = 0; i < argc; ++i) | |
{ | |
const char* arg = argv[i]; | |
if (arg[0] != '-') | |
{ | |
load_module(arg); | |
} | |
} | |
resolve_stdin(); | |
break; | |
} | |
SymCleanup(g_handle); | |
return 0; | |
} | |
//------------------------------------------------------------------------------ | |
const char* g_sym_tag_names[] = { | |
"SymTagNull", "SymTagExe", "SymTagCompiland", "SymTagCompilandDetails", | |
"SymTagCompilandEnv", "SymTagFunction", "SymTagBlock", "SymTagData", | |
"SymTagAnnotation", "SymTagLabel", "SymTagPublicSymbol", "SymTagUDT", | |
"SymTagEnum", "SymTagFunctionType", "SymTagPointerType", "SymTagArrayType", | |
"SymTagBaseType", "SymTagTypedef", "SymTagBaseClass", "SymTagFriend", | |
"SymTagFunctionArgType", "SymTagFuncDebugStart", "SymTagFuncDebugEnd", | |
"SymTagUsingNamespace", "SymTagVTableShape", "SymTagVTable", "SymTagCustom", | |
"SymTagThunk", "SymTagCustomType", "SymTagManagedType", "SymTagDimension" | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for your awesome tool!
I found an small issue if symbol name is too long and
strcpy(sym_info->name, info->Name);
can lead to buffer overflow.Here is an fix: https://gist.github.com/BOT-Man-JL/9206a62b067f4c3a84da57bd3ba04a97/revisions