Skip to content

Instantly share code, notes, and snippets.

@str4d
Last active March 27, 2024 12:41
Show Gist options
  • Star 32 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save str4d/e541f4c28e2bca80d222434ac1a204f4 to your computer and use it in GitHub Desktop.
Save str4d/e541f4c28e2bca80d222434ac1a204f4 to your computer and use it in GitHub Desktop.
Ghidra script for demangling Rust symbols
# Attempts to demangle all mangled symbols in the current program using the Rust
# mangling schemes, and replace the default symbol and function signature
# (if applicable) with the demangled symbol.
#
# License: MIT OR Apache-2.0
#@author Jack Grigg <thestr4d@gmail.com>
#@category Symbol
import string
from ghidra.app.util.demangler import (
DemangledDataType as GhDataType,
DemangledFunction,
DemangledLambda,
DemanglerOptions,
DemangledTemplate,
DemangledType,
DemangledUnknown,
DemangledVariable,
)
from ghidra.program.model.symbol import SourceType
# Adapted from the rustc-demangle crate, written by Alex Crichton.
#
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96
# License: MIT OR Apache-2.0
def strip_prefix(prefix, s):
normal_len = len(prefix) + 1
win_len = normal_len - 1
osx_len = normal_len + 1
if len(s) > normal_len and s.startswith('_' + prefix):
return s[normal_len:]
elif len(s) > win_len and s.startswith(prefix):
# On Windows, dbghelp strips leading underscores.
return s[win_len:]
elif len(s) > osx_len and s.startswith('__' + prefix):
# On OSX, symbols are prefixed with an extra _
return s[osx_len:]
else:
return None
#
# Legacy scheme
#
# Constructs a `DemangledObject` from its parts.
#
# `fn` must be a subclass of `DemangledObject` with the required constructor.
def objectify(fn, namespace, mangled, demangled, demangled_name):
result = fn(mangled, demangled, demangled_name)
if namespace is not None:
result.setNamespace(namespace)
return result
# Attempts to demangle the given symbol using the legacy Rust scheme.
#
# Returns:
# - `None` if the symbol couldn't be demangled.
# - `(namespace, mangled, demangled, demangled_name)` otherwise, where:
# - `namespace` is either a `DemangledObject` or `None`
# - `mangled` is the original symbol.
# - `demangled` is the fully-demangled symbol.
# - `demangled_name` is the last part of the symbol.
#
# Adapted from the rustc-demangle crate, written by Alex Crichton.
#
# Source: https://github.com/rust-lang/rustc-demangle/tree/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96
# License: MIT OR Apache-2.0
def demangle_legacy(mangled):
# See src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings.
ESCAPE_MAP = {
'SP': '@',
'BP': '*',
'RF': '&',
'LT': '<',
'GT': '>',
'LP': '(',
'RP': ')',
'C': ',',
}
def is_rust_hash(s):
return s.startswith('h') and all([c in string.hexdigits[:16] for c in s[1:]])
# First validate the symbol.
inner = strip_prefix('ZN', mangled)
if inner is None:
return None
# Only work with ASCII text.
if any([ord(c) >= 128 for c in inner]):
return None
# Count the number of elements.
elements = 0
chars = iter(inner)
c = next(chars)
while c != 'E':
# Decode an identifier element's length.
if not c.isdigit():
return None
len = 0
while c.isdigit():
len = 10 * len + int(c)
c = next(chars)
# `c` already contains the first character of this identifier, skip it and
# all the other characters of this identifier, to reach the next element.
for _ in range(len):
c = next(chars)
elements += 1
# Parse the elements.
demangled = ''
parts = []
for element in range(elements):
split = 1
while inner[:split].isdigit():
split += 1
split -= 1
i = int(inner[:split])
rest = inner[split:]
inner = rest[i:]
rest = rest[:i]
# Skip the hash at the end.
if element + 1 == elements and is_rust_hash(rest):
break
if element != 0:
demangled += '::'
if rest.startswith('_$'):
rest = rest[1:]
demangled_name = ''
while True:
if rest.startswith('.'):
if next(iter(rest[1:])) == '.':
demangled_name += '::'
rest = rest[2:]
else:
demangled_name += '.'
rest = rest[1:]
elif rest.startswith('$'):
try:
end = rest[1:].index('$')
except ValueError:
break
escape = rest[1:end + 1]
after_escape = rest[end + 2:]
unescaped = ESCAPE_MAP.get(escape)
if unescaped is not None:
demangled_name += unescaped
rest = after_escape
else:
if not escape.startswith('u'):
# Invalid escape sequence.
break
digits = escape[1:]
if not all([c in string.hexdigits[:16] for c in digits]):
# Invalid hex digit, or not lowercase.
break
try:
c = chr(int(digits, 16))
# TODO: Maybe filter out control codepoints?
demangled_name += c
rest = after_escape
continue
except ValueError:
# Out of range for unicode codepoint.
break
else:
def find_next(char):
try:
return rest.index(char)
except ValueError:
return None
i_dollar = find_next('$')
i_dot = find_next('.')
if i_dollar is not None and i_dot is not None:
i = min(i_dollar, i_dot)
elif i_dollar is not None:
i = i_dollar
elif i_dot is not None:
i = i_dot
else:
break
demangled_name += rest[:i]
rest = rest[i:]
demangled_name += rest
demangled += demangled_name
parts.append(demangled_name)
# Handle any namespace parts.
namespace = None
for demangled_name in parts[:-1]:
namespace = objectify(
DemangledType,
namespace,
mangled,
demangled,
demangled_name)
# Return the un-objectified parts so we can figure out what class to use.
return (namespace, mangled, demangled, parts[-1])
#
# v0 scheme
#
# The Rust v0 scheme is designed to enable single-pass demangling. Unfortunately
# Ghidra's internal types for demangled components require the full demangled
# string at construction time. We therefore implement a multi-pass strategy:
#
# - We demangle the mangled symbol into a tree of Rust types.
# - We print the demangled string from the tree.
# - We traverse the tree, converting its nodes into Ghidra types.
#
# Non-mutating, non-rewindable cursor. Each function returns a tuple of:
# - The new cursor, and the result (on success).
# - The current cursor, and None (on failure).
class Cursor:
def __init__(self, backrefs, bytes, pos=0):
self.backrefs = backrefs
self.bytes = bytes
self.pos = pos
def backref_add(self, val):
self.backrefs[self.pos] = val
def backref_parse(self):
(rest, prefix) = self.strip_prefix(b'B')
if prefix:
(rest, num) = base_62_number(rest)
if num is not None:
backref = self.backrefs.get(num)
if backref is not None:
return (rest, backref)
return (self, None)
def is_empty(self):
return len(self.bytes) == 0
def prefix_is(self, cond):
return len(self.bytes) > 0 and cond(self.bytes[:1])
def strip_prefix(self, prefix):
(rest, c) = self.take(1)
if c == prefix:
return (rest, True)
else:
return (self, False)
def take(self, i):
if i < 0 or len(self.bytes) < i:
return (self, None)
return (Cursor(self.backrefs, self.bytes[i:], self.pos + i), self.bytes[:i])
def take_until(self, pattern):
return self.take(self.bytes.find(pattern))
def take_while(self, cond):
i = 0
while i < len(self.bytes):
if not cond(self.bytes[i]):
break
i += 1
return self.take(i)
def backref_store(init):
def storing_init(self, cursor, *args):
init(self, *args)
cursor.backref_add(self)
return storing_init
def decimal_number(s):
(rest, decimals) = s.take_while(lambda c: c.isdigit())
if len(decimals) == 0:
return (s, None)
return (rest, int(decimals))
BASE62_CHARS = (string.digits + string.ascii_letters).encode('UTF-8')
def base_62_number(s):
# Find the underscore marking the end.
(rest, digits) = s.take_until(b'_')
if digits is None:
return (s, None)
(rest, match) = rest.strip_prefix(b'_')
assert match
if len(digits) == 0:
return (rest, 0)
# Decode the Base62 number.
num = 0
for c in digits:
word = BASE62_CHARS.find(c)
if word < 0:
return (s, None)
num = 62 * num + word
return (rest, num + 1)
# Returns the numeric index corresponding to the disambiguator.
def disambiguator(s):
(rest, prefix) = s.take(1)
if prefix == b's':
(rest, num) = base_62_number(rest)
if num is None or rest.is_empty():
return (s, None)
else:
return (rest, num + 1)
return (s, 0)
class RustPath:
class Trait:
@backref_store
def __init__(self, typ, path):
self.typ = typ
self.path = path
def __repr__(self):
return 'Trait(%r, %r)' % (self.typ, self.path)
def __str__(self):
return '<%s%s>' % (self.typ, '' if self.path is None else ' as %s' % self.path)
def to_ghidra(self, mangled, demangled):
# Ghidra has no concept of traits, so we ignore the path and just
# return the type (pretending the trait is the only souce of
# whatever child path is being accessed).
return self.typ.to_ghidra(mangled, demangled)
class Nested:
@backref_store
def __init__(self, parent, identifier):
self.parent = parent
self.identifier = identifier
def __repr__(self):
return 'Nested(%r, %r)' % (self.parent, self.identifier)
def __str__(self):
return '%s::%s' % (self.parent, self.identifier)
def to_ghidra(self, mangled, demangled):
ret = self.identifier.to_ghidra(mangled, demangled)
ret.setNamespace(self.parent.to_ghidra(mangled, demangled))
return ret
class Generic:
@backref_store
def __init__(self, path, generic_args):
self.path = path
self.generic_args = generic_args
def __repr__(self):
return 'Generic(%r, %r)' % (self.path, self.generic_args)
def __str__(self):
return '%s%s' % (self.path, self.generic_args)
def to_ghidra(self, mangled, demangled):
ret = self.path.to_ghidra(mangled, demangled)
ret.setTemplate(self.generic_args.to_ghidra(mangled, demangled))
return ret
@classmethod
def parse(cls, s):
(rest, backref) = s.backref_parse()
if backref is not None:
return (rest, backref)
(rest, prefix) = s.take(1)
if prefix == b'C':
# crate root
(rest, identifier) = RustIdentifier.parse(rest, RustNamespace.internal_type())
if identifier is not None:
return (rest, identifier)
elif prefix in [b'M', b'X', b'Y']:
# <T> (inherent impl)
# <T as Trait> (trait impl)
# <T as Trait> (trait definition)
if prefix != b'Y':
# Parse and ignore impl-path.
(rest, index) = disambiguator(rest)
if index is None:
return (s, None)
(rest, impl_path) = cls.parse(rest)
if impl_path is None:
return (s, None)
(rest, typ) = RustType.parse(rest)
if typ is not None:
if prefix == b'M':
path = None
else:
(rest, path) = cls.parse(rest)
if path is None:
return (s, None)
return (rest, cls.Trait(s, typ, path))
elif prefix == b'N':
# ...::ident (nested path)
(rest, namespace) = RustNamespace.parse(rest)
(rest, parent) = cls.parse(rest)
if parent is not None:
(rest, identifier) = RustIdentifier.parse(rest, namespace)
if identifier is not None:
return (rest, cls.Nested(s, parent, identifier))
elif prefix == b'I':
# ...<T, U> (generic args)
(rest, path) = cls.parse(rest)
if path is not None:
(rest, generic_args) = RustGenericArgs.parse(rest)
if generic_args is not None:
return (rest, cls.Generic(s, path, generic_args))
# Invalid
return (s, None)
class RustIdentifier:
@classmethod
def parse(cls, s, namespace):
# Parse the disambiguator.
(rest, index) = disambiguator(s)
if index is None:
return (s, None)
# Check for Punycode encoding.
(rest, punycode) = rest.strip_prefix(b'u')
(rest, bytes_len) = decimal_number(rest)
if bytes_len is None:
return (s, None)
# Strip the separator if present.
(rest, _) = rest.strip_prefix(b'_')
# Parse the identifier.
(rest, id_bytes) = rest.take(bytes_len)
if punycode:
identifier = id_bytes.replace(b'_', b'-').decode('punycode')
else:
identifier = id_bytes.decode('UTF-8')
return (rest, cls(s, namespace, identifier, index))
@backref_store
def __init__(self, namespace, identifier, index):
self.namespace = namespace
self.identifier = identifier
self.index = index
def __repr__(self):
return 'Ident(%s, %s)' % (self.namespace, self)
def __str__(self):
return self.namespace.rust(self.identifier, self.index)
def to_ghidra(self, mangled, demangled):
return self.namespace.ghidra(mangled, demangled, str(self))
# Helper for printing special namespaces.
def ns(prefix, ident, index):
return '{%s%s}%s' % (
prefix,
':%s' % ident if len(ident) > 0 else '',
'[%d]' % index if index > 0 else '',
)
def StaticOrFunction(mangled, demangled, name):
# If we don't tag functions as functions, then they don't show up correctly.
# But if we tag a global static as a function, we trigger the error:
# java.lang.IllegalArgumentException:
# Address not in memory or is off-cut data/instruction
#
# We can't get this perfectly right during parsing, but we can guess pretty
# well by treating idents in SCREAMING_SNAKE_CASE as global statics.
if all([c in string.ascii_uppercase + '_' for c in name]):
return DemangledVariable(mangled, demangled, name)
else:
return DemangledFunction(mangled, demangled, name)
class RustNamespace:
PREFIXES = {
b'A': (lambda ident, idx: ns('A', ident, idx), DemangledUnknown),
b'B': (lambda ident, idx: ns('B', ident, idx), DemangledUnknown),
b'C': (lambda _, idx: '{closure}[%d]' % idx, DemangledLambda),
b'D': (lambda ident, idx: ns('D', ident, idx), DemangledUnknown),
b'E': (lambda ident, idx: ns('E', ident, idx), DemangledUnknown),
b'F': (lambda ident, idx: ns('F', ident, idx), DemangledUnknown),
b'G': (lambda ident, idx: ns('G', ident, idx), DemangledUnknown),
b'H': (lambda ident, idx: ns('H', ident, idx), DemangledUnknown),
b'I': (lambda ident, idx: ns('I', ident, idx), DemangledUnknown),
b'J': (lambda ident, idx: ns('J', ident, idx), DemangledUnknown),
b'K': (lambda ident, idx: ns('K', ident, idx), DemangledUnknown),
b'L': (lambda ident, idx: ns('L', ident, idx), DemangledUnknown),
b'M': (lambda ident, idx: ns('M', ident, idx), DemangledUnknown),
b'N': (lambda ident, idx: ns('N', ident, idx), DemangledUnknown),
b'O': (lambda ident, idx: ns('O', ident, idx), DemangledUnknown),
b'P': (lambda ident, idx: ns('P', ident, idx), DemangledUnknown),
b'Q': (lambda ident, idx: ns('Q', ident, idx), DemangledUnknown),
b'R': (lambda ident, idx: ns('R', ident, idx), DemangledUnknown),
b'S': (lambda ident, idx: ns('shim', ident, idx), DemangledUnknown), # TODO: Pick type
b'T': (lambda ident, idx: ns('T', ident, idx), DemangledUnknown),
b'U': (lambda ident, idx: ns('U', ident, idx), DemangledUnknown),
b'V': (lambda ident, idx: ns('V', ident, idx), DemangledUnknown),
b'W': (lambda ident, idx: ns('W', ident, idx), DemangledUnknown),
b'X': (lambda ident, idx: ns('X', ident, idx), DemangledUnknown),
b'Y': (lambda ident, idx: ns('Y', ident, idx), DemangledUnknown),
b'Z': (lambda ident, idx: ns('Z', ident, idx), DemangledUnknown),
b't': (lambda ident, _: ident, DemangledType),
b'v': (lambda ident, _: ident, StaticOrFunction),
}
INTERNAL_PREFIX_RANGE = string.ascii_lowercase.encode('UTF-8')
@classmethod
def parse(cls, s):
(rest, prefix) = s.take(1)
res = cls.PREFIXES.get(prefix)
if res is not None:
return (rest, cls(prefix, *res))
elif prefix in cls.INTERNAL_PREFIX_RANGE:
return (rest, cls(prefix, lambda ident, _: ident, DemangledUnknown))
else:
return (s, None)
@classmethod
def internal_type(cls):
return cls(b't', *cls.PREFIXES[b't'])
def __init__(self, prefix, rust, ghidra):
self.prefix = prefix
self.rust = rust
self.ghidra = ghidra
def __repr__(self):
return self.prefix.decode('UTF-8')
class RustGenericArgs:
@classmethod
def parse(cls, s):
def parse_arg(r):
(rest, lifetime) = RustLifetime.parse(r)
if lifetime is not None:
return (rest, lifetime)
(rest, typ) = RustType.parse(r)
if typ is not None:
return (rest, typ)
(rest, prefix) = r.take(1)
if prefix == b'K':
# const generic
print('TODO: const generic args')
return (r, None)
generic_args = []
rest = s
while rest.prefix_is(lambda prefix: prefix != b'E'):
(rest, arg) = parse_arg(rest)
if arg is None:
return (s, None)
generic_args.append(arg)
(rest, match) = rest.strip_prefix(b'E')
assert match
return (rest, cls(generic_args))
def __init__(self, generic_args):
self.generic_args = generic_args
def __repr__(self):
return 'Args(%s)' % ', '.join(['%r' % arg for arg in self.generic_args])
def __str__(self):
return '<%s>' % ', '.join(['%s' % arg for arg in self.generic_args])
def to_ghidra(self, mangled, demangled):
tpl = DemangledTemplate()
for arg in self.generic_args:
gh_arg = arg.to_ghidra(mangled, demangled)
if type(gh_arg) == GhDataType:
tpl.addParameter(gh_arg)
else:
# DemangledTemplate only accepts DemangledDataType args. We need
# to catch and convert other Rust types.
param = GhDataType(mangled, demangled, gh_arg.getDemangledName())
tpl.addParameter(param)
return tpl
class RustLifetime:
@classmethod
def parse(cls, s):
(rest, prefix) = s.take(1)
if prefix == b'L':
(rest, index) = base_62_number(rest)
if index is not None:
return (rest, cls(index))
return (s, None)
def __init__(self, index):
self.index = index
def __repr__(self):
return 'Lifetime(%d)' % self.index
def __str__(self):
if self.index == 0:
# TODO: Elide lifetime.
return '\'_'
else:
# TODO: Reference binders.
return '\'%d' % self.index
def to_ghidra(self, mangled, demangled):
return DemangledType(mangled, demangled, str(self))
class RustType:
class Slice:
@backref_store
def __init__(self, typ):
self.typ = typ
def __repr__(self):
return 'Slice(%r)' % self.typ
def __str__(self):
return '[%s]' % self.typ
def to_ghidra(self, mangled, demangled):
ret = self.typ.to_ghidra(mangled, demangled)
# Pretend it's an array.
if type(ret) != GhDataType:
ret = GhDataType(mangled, demangled, ret.getDemangledName())
ret.setArray(1)
return ret
class Tuple:
@backref_store
def __init__(self, types):
self.types = types
def __repr__(self):
return 'Tuple(%r)' % ', '.join(['%r' % typ for typ in self.types])
def __str__(self):
return '(%s)' % ', '.join(['%s' % typ for typ in self.types])
def to_ghidra(self, mangled, demangled):
# Represent a tuple as a template with name 'tuple$'.
ret = GhDataType(mangled, demangled, 'tuple$')
ret.setTemplate(RustGenericArgs(self.types).to_ghidra(mangled, demangled))
return ret
class Ref:
@backref_store
def __init__(self, lifetime, mutable, typ):
self.lifetime = lifetime
self.mutable = mutable
self.typ = typ
def __repr__(self):
return 'Ref%s(%s, %r)' % ('Mut' if self.mutable else '', self.lifetime, self.typ)
def __str__(self):
return '&%s%s%s' % (
'' if self.lifetime is None else '%s ' % self.lifetime,
'mut ' if self.mutable else '',
self.typ,
)
def to_ghidra(self, mangled, demangled):
ret = self.typ.to_ghidra(mangled, demangled)
if type(ret) != GhDataType:
ret = GhDataType(mangled, demangled, ret.getDemangledName())
ret.setReference()
if not self.mutable:
ret.setConst()
return ret
class Ptr:
@backref_store
def __init__(self, typ, mutable):
self.typ = typ
self.mutable = mutable
def __repr__(self):
return 'Ptr%s(%r)' % ('Mut' if self.mutable else '', self.typ)
def __str__(self):
return '*%s %s' % ('mut' if self.mutable else 'const', self.typ)
def to_ghidra(self, mangled, demangled):
ret = self.typ.to_ghidra(mangled, demangled)
if type(ret) != GhDataType:
ret = GhDataType(mangled, demangled, ret.getDemangledName())
ret.incrementPointerLevels()
if not self.mutable:
ret.setConst()
return ret
@classmethod
def parse(cls, s):
(rest, backref) = s.backref_parse()
if backref is not None:
return (rest, backref)
(rest, basic) = RustBasicType.parse(s)
if basic is not None:
return (rest, basic)
(rest, path) = RustPath.parse(s)
if path is not None:
return (rest, path)
(rest, prefix) = s.take(1)
if prefix == b'A':
# [T; N]
print('TODO: arrays')
elif prefix == b'S':
# [T]
(rest, typ) = cls.parse(rest)
if typ is not None:
return (rest, cls.Slice(s, typ))
elif prefix == b'T':
# (T1, T2, T3, ...)
types = []
while rest.prefix_is(lambda prefix: prefix != b'E'):
(rest, typ) = RustType.parse(rest)
if typ is None:
return (s, None)
else:
types.append(typ)
(rest, match) = rest.strip_prefix(b'E')
assert match
return (rest, cls.Tuple(s, types))
elif prefix in [b'R', b'Q']:
# &T
# &mut T
(rest, lifetime) = RustLifetime.parse(rest)
(rest, typ) = cls.parse(rest)
if typ is not None:
return (rest, cls.Ref(s, lifetime, prefix == b'Q', typ))
elif prefix in [b'P', b'O']:
# *const T
# *mut T
(rest, typ) = cls.parse(rest)
if typ is not None:
return (rest, cls.Ptr(s, typ, prefix == b'O'))
elif prefix == b'F':
# fn(...) -> ...
print('TODO: function signatures')
elif prefix == b'D':
# dyn Trait<Assoc = X> + Send + 'a
print('TODO: dyn Trait')
# Invalid
return (s, None)
# Marks a GhDataType as unsigned.
def u(t):
t.setUnsigned()
return t
class RustBasicType:
PREFIXES = {
b'a': ('i8', lambda m, d: GhDataType(m, d, GhDataType.INT8)),
b'b': ('bool', lambda m, d: GhDataType(m, d, GhDataType.BOOL)),
b'c': ('char', lambda m, d: GhDataType(m, d, GhDataType.CHAR)),
b'd': ('f64', lambda m, d: GhDataType(m, d, GhDataType.DOUBLE)),
b'e': ('str', lambda m, d: GhDataType(m, d, GhDataType.STRING)),
b'f': ('f32', lambda m, d: GhDataType(m, d, GhDataType.FLOAT)),
b'h': ('u8', lambda m, d: u(GhDataType(m, d, GhDataType.INT8))),
b'i': ('isize', lambda m, d: GhDataType(m, d, GhDataType.INT)),
b'j': ('usize', lambda m, d: u(GhDataType(m, d, GhDataType.INT))),
b'l': ('i32', lambda m, d: GhDataType(m, d, GhDataType.INT32)),
b'm': ('u32', lambda m, d: u(GhDataType(m, d, GhDataType.INT32))),
b'n': ('i128', lambda m, d: GhDataType(m, d, GhDataType.INT128)),
b'o': ('u128', lambda m, d: u(GhDataType(m, d, GhDataType.INT128))),
b's': ('i16', lambda m, d: GhDataType(m, d, GhDataType.INT16)),
b't': ('u16', lambda m, d: u(GhDataType(m, d, GhDataType.INT16))),
b'u': ('()', lambda m, d: GhDataType(m, d, GhDataType.VOID)),
b'v': ('...', lambda m, d: GhDataType(m, d, GhDataType.VARARGS)),
b'x': ('i64', lambda m, d: GhDataType(m, d, GhDataType.INT64)),
b'y': ('u64', lambda m, d: u(GhDataType(m, d, GhDataType.INT64))),
b'z': ('!', lambda m, d: GhDataType(m, d, GhDataType.UNDEFINED)),
b'p': ('_', lambda m, d: GhDataType(m, d, GhDataType.UNDEFINED)),
}
@classmethod
def parse(cls, s):
(rest, prefix) = s.take(1)
res = cls.PREFIXES.get(prefix)
if res is None:
return (s, None)
return (rest, cls(*res))
def __init__(self, name, ghidra):
self.name = name
self.ghidra = ghidra
def __repr__(self):
return 'Basic(%s)' % self.name
def __str__(self):
return self.name
def to_ghidra(self, mangled, demangled):
return self.ghidra(mangled, demangled)
def demangle_v0(mangled):
# Verify and strip the symbol prefix.
inner = strip_prefix('R', mangled)
if inner is None:
return None # Invalid
# The remaining string must conform to the following grammar:
# [<decimal-number>] <path> [<path>]
# Paths always start with uppercase characters.
if inner[0] not in string.digits + string.ascii_uppercase:
return None # Invalid
# Only work with ASCII text.
if any([ord(c) >= 128 for c in inner]):
return None
rest = Cursor({}, inner.encode('UTF-8'))
(rest, encoding_version) = decimal_number(rest)
(rest, path) = RustPath.parse(rest)
if path is None:
return None
(rest, instantiating_crate) = RustPath.parse(rest)
demangled = str(path)
return path.to_ghidra(mangled, demangled)
symbol_table = currentProgram.getSymbolTable()
namespace = currentProgram.getNamespaceManager().getGlobalNamespace()
num_demangled = 0
failures = []
for symbol in symbol_table.getSymbols(namespace):
if symbol.getSource() == SourceType.DEFAULT:
continue
addr = symbol.getAddress()
name = symbol.getName()
demangled = demangle_legacy(name)
if demangled is not None:
# Delete the existing symbol, otherwise we get duplicates.
symbol.delete()
# Try treating the symbol as a function.
try:
if objectify(DemangledFunction, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor):
num_demangled += 1
else:
print('Couldn\'t apply demangling for %s' % name)
failures.append(name)
except java.lang.IllegalArgumentException:
# Not a function. This is probably a static, but treat it as unknown.
if objectify(DemangledUnknown, *demangled).applyTo(currentProgram, addr, DemanglerOptions(), monitor):
num_demangled += 1
else:
print('Couldn\'t apply demangling for %s' % name)
failures.append(name)
demangled = demangle_v0(name)
if demangled is not None:
# Delete the existing symbol, otherwise we get duplicates.
symbol.delete()
try:
if demangled.applyTo(currentProgram, addr, DemanglerOptions(), monitor):
num_demangled += 1
else:
print('Couldn\'t apply demangling for %s' % name)
failures.append(name)
except:
print('Error in demangling for %s' % name)
raise
print('Demangled %d names' % num_demangled)
if len(failures) > 0:
print('Failed to demangle (%d):' % len(failures))
for n in sorted(failures):
print('- %s' % n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment