Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active November 11, 2017 15:58
Show Gist options
  • Save lifthrasiir/7fb50a52154ffc8c0b5e to your computer and use it in GitHub Desktop.
Save lifthrasiir/7fb50a52154ffc8c0b5e to your computer and use it in GitHub Desktop.
Rust Metadata Decoder (of sorta)
import sys
import struct
import zlib
import collections
TAGS = {}
TAGS2 = {}
AUTOTAGS = {}
O = TAGS # normal objects
A = AUTOTAGS # autoserialized
B = lambda s: {'\x00': False, '\x01': True}[s]
S = lambda s: s
CC = lambda s: struct.unpack('!b', s)[0]
C = lambda s: struct.unpack('!B', s)[0]
HH = lambda s: struct.unpack('!h', s)[0]
H = lambda s: struct.unpack('!H', s)[0]
II = lambda s: struct.unpack('!i', s)[0]
I = lambda s: struct.unpack('!I', s)[0]
LL = lambda s: struct.unpack('!q', s)[0]
L = lambda s: struct.unpack('!Q', s)[0]
D = lambda s: struct.unpack('!d', s)[0]
F = lambda s: struct.unpack('!f', s)[0]
TT = lambda s: struct.unpack('!256I', s)
FIXED = [B, CC, C, HH, H, II, I, LL, L, D, F, TT]
TAGS.update({
0x00: (O, 'tag_items'),
0x01: (S, 'tag_paths_data_name'),
0x02: (S, 'tag_def_id'),
0x03: (O, 'tag_items_data'),
0x04: (O, 'tag_items_data_item'),
0x05: (S, 'tag_items_data_item_family'),
0x07: (S, 'tag_items_data_item_type'),
0x08: (S, 'tag_items_data_item_symbol'),
0x09: (S, 'tag_items_data_item_variant'),
0x0a: (S, 'tag_items_data_parent_item'),
0x0b: (O, 'tag_items_data_item_is_tuple_struct_ctor'),
0x0c: (O, 'tag_index'),
0x0d: (O, 'tag_index_buckets'),
0x0e: (O, 'tag_index_buckets_bucket'),
0x0f: (L, 'tag_index_buckets_bucket_elt'),
0x10: (TT, 'tag_index_table'),
0x11: (O, 'tag_meta_item_name_value'),
0x12: (S, 'tag_meta_item_name'),
0x13: (S, 'tag_meta_item_value'),
0x14: (O, 'tag_attributes'),
0x15: (O, 'tag_attribute'),
0x16: (O, 'tag_meta_item_word'),
0x17: (O, 'tag_meta_item_list'),
0x18: (O, 'tag_crate_deps'),
0x19: (O, 'tag_crate_dep'),
0x1a: (S, 'tag_crate_hash'),
0x1b: (S, 'tag_crate_crate_name'),
0x1d: (S, 'tag_crate_dep_crate_name'),
0x1e: (S, 'tag_crate_dep_hash'),
0x1f: (S, 'tag_mod_impl'),
0x20: (O, 'tag_item_trait_item'),
0x21: (S, 'tag_item_trait_ref'),
0x22: (S, 'tag_item_super_trait_ref'),
0x23: (S, 'tag_disr_val'),
0x24: (O, 'tag_path'),
0x25: (I, 'tag_path_len'),
0x26: (S, 'tag_path_elem_mod'),
0x27: (S, 'tag_path_elem_name'),
0x28: (O, 'tag_item_field'),
0x29: (S, 'tag_item_field_origin'),
0x2a: (A, 'tag_item_variances'),
0x30: (O, 'tag_item_impl_item'),
0x31: (S, 'tag_item_trait_method_explicit_self'),
0x38: (O, 'tag_items_data_item_reexport'),
0x39: (S, 'tag_items_data_item_reexport_def_id'),
0x3a: (S, 'tag_items_data_item_reexport_name'),
0x40: (A, 'tag_ast'),
0x41: (A, 'tag_tree'),
0x42: (S, 'tag_id_range'),
0x43: (O, 'tag_table'),
0x44: (L, 'tag_table_id'),
0x45: (A, 'tag_table_val'),
0x46: (O, 'tag_table_def'),
0x47: (O, 'tag_table_node_type'),
0x48: (O, 'tag_table_item_subst'),
0x49: (O, 'tag_table_freevars'),
0x4a: (O, 'tag_table_tcache'),
0x4b: (O, 'tag_table_param_defs'),
0x4c: (O, 'tag_table_mutbl'),
0x4d: (O, 'tag_table_last_use'),
0x4e: (O, 'tag_table_spill'),
0x4f: (O, 'tag_table_method_map'),
0x50: (O, 'tag_table_vtable_map'),
0x51: (O, 'tag_table_adjustments'),
0x52: (O, 'tag_table_moves_map'),
0x53: (O, 'tag_table_capture_map'),
0x54: (O, 'tag_table_closure_tys'),
0x55: (O, 'tag_table_closure_kinds'),
0x56: (O, 'tag_table_upvar_capture_map'),
0x57: (O, 'tag_table_capture_modes'),
0x58: (O, 'tag_table_object_cast_map'),
0x59: (O, 'tag_table_const_qualif'),
0x60: (S, 'tag_item_trait_item_sort'),
0x61: (S, 'tag_item_trait_parent_sort'),
0x62: (S, 'tag_item_impl_type_basename'),
0x66: (S, 'tag_crate_triple'),
0x67: (S, 'tag_dylib_dependency_formats'),
0x70: (O, 'tag_lang_items'),
0x71: (O, 'tag_lang_items_item'),
0x72: (I, 'tag_lang_items_item_id'),
0x73: (I, 'tag_lang_items_item_node_id'),
0x74: (I, 'tag_lang_items_missing'),
0x75: (O, 'tag_item_unnamed_field'),
0x76: (S, 'tag_items_data_item_visibility'),
0x79: (S, 'tag_item_method_tps'),
0x7a: (S, 'tag_item_method_fty'),
0x7b: (S, 'tag_mod_child'),
0x7c: (O, 'tag_misc_info'),
0x7d: (O, 'tag_misc_info_crate_items'),
0x7e: (S, 'tag_item_method_provided_source'),
0x7f: (S, 'tag_item_impl_vtables'),
0x80: (O, 'tag_impls'),
0x81: (O, 'tag_impls_impl'),
0x82: (O, 'tag_items_data_item_inherent_impl'),
0x83: (O, 'tag_items_data_item_extension_impl'),
0x87: (O, 'tag_native_libraries'),
0x88: (O, 'tag_native_libraries_lib'),
0x89: (S, 'tag_native_libraries_name'),
0x8a: (I, 'tag_native_libraries_kind'),
0x8b: (S, 'tag_plugin_registrar_fn'),
0x8e: (O, 'tag_method_argument_names'),
0x8f: (S, 'tag_method_argument_name'),
0x90: (O, 'tag_reachable_extern_fns'),
0x91: (I, 'tag_reachable_extern_fn_id'),
0x92: (A, 'tag_items_data_item_stability'),
0x93: (A, 'tag_items_data_item_repr'),
0x99: (O, 'tag_struct_fields'),
0x9a: (O, 'tag_struct_field'),
0x9b: (I, 'tag_struct_field_id'),
0x9c: (B, 'tag_attribute_is_sugared_doc'),
0x9d: (S, 'tag_trait_def_bounds'),
0x9e: (S, 'tag_items_data_region'),
0xa0: (O, 'tag_region_param_def'),
0xa1: (O, 'tag_region_param_def_ident'),
0xa2: (S, 'tag_region_param_def_def_id'),
0xa3: (L, 'tag_region_param_def_space'),
0xa4: (L, 'tag_region_param_def_index'),
0xa5: (S, 'tag_type_param_def'),
0xa6: (O, 'tag_item_generics'),
0xa7: (O, 'tag_method_ty_generics'),
0xa8: (S, 'tag_predicate'),
0xa9: (S, 'tag_predicate_space'),
0xb0: (S, 'tag_predicate_data'),
0xb1: (S, 'tag_unsafety'),
0xb2: (S, 'tag_associated_type_names'),
0xb3: (S, 'tag_associated_type_name'),
0xb4: (S, 'tag_polarity'),
0xb5: (O, 'tag_macro_defs'),
0xb6: (O, 'tag_macro_def'),
0xb7: (S, 'tag_macro_def_body'),
0xb8: (B, 'tag_paren_sugar'),
})
AUTOTAGS.update(TAGS)
AUTOTAGS.update({
0: (L, 'Uint'), # XXX
1: (L, 'U64'),
2: (I, 'U32'),
3: (H, 'U16'),
4: (C, 'U8'),
5: (LL, 'Int'), # XXX
6: (LL, 'I64'),
7: (II, 'I32'),
8: (HH, 'I16'),
9: (CC, 'I8'),
10: (B, 'Bool'),
11: (I, 'Char'),
12: (S, 'Str'),
13: (D, 'F64'),
14: (F, 'F32'),
15: (S, 'Float'),
16: (A, 'Enum'),
17: (I, 'EnumVid'),
18: (A, 'EnumBody'),
19: (A, 'Vec'),
20: (I, 'VecLen'),
21: (A, 'VecElt'),
22: (A, 'Map'),
23: (I, 'MapLen'),
24: (A, 'MapKey'),
25: (A, 'MapVal'),
26: (S, 'Opaque'),
27: (S, 'Label'),
})
NEWTAGS = {}
O = A = NEWTAGS
NEWTAGS.update({
# auto-serialization
0x00: (L, 'Uint'), # XXX
0x01: (L, 'U64'),
0x02: (I, 'U32'),
0x03: (H, 'U16'),
0x04: (C, 'U8'),
0x05: (LL, 'Int'), # XXX
0x06: (LL, 'I64'),
0x07: (II, 'I32'),
0x08: (HH, 'I16'),
0x09: (CC, 'I8'),
0x0a: (B, 'Bool'),
0x0b: (I, 'Char'),
0x0c: (D, 'F64'),
0x0d: (F, 'F32'),
0x0e: (I, 'EnumVid'),
0x0f: (I, 'VecLen'),
0x10: (I, 'MapLen'),
0x11: (S, 'Str'),
0x12: (A, 'Enum'),
0x13: (A, 'Vec'),
0x14: (A, 'VecElt'),
0x15: (A, 'Map'),
0x16: (A, 'MapKey'),
0x17: (A, 'MapVal'),
0x18: (S, 'Opaque'),
0x19: (S, 'Label'),
0x100: (O, 'tag_items'),
0x20: (S, 'tag_paths_data_name'),
0x21: (S, 'tag_def_id'),
0x22: (O, 'tag_items_data'),
0x23: (O, 'tag_items_data_item'),
0x24: (S, 'tag_items_data_item_family'),
0x25: (S, 'tag_items_data_item_type'),
0x26: (S, 'tag_items_data_item_symbol'),
0x27: (S, 'tag_items_data_item_variant'),
0x28: (S, 'tag_items_data_parent_item'),
0x29: (O, 'tag_items_data_item_is_tuple_struct_ctor'),
0x2a: (O, 'tag_index'),
0x2b: (O, 'tag_index_buckets'),
0x2c: (O, 'tag_index_buckets_bucket'),
0x2d: (L, 'tag_index_buckets_bucket_elt'),
0x2e: (TT, 'tag_index_table'),
0x2f: (O, 'tag_meta_item_name_value'),
0x30: (S, 'tag_meta_item_name'),
0x31: (S, 'tag_meta_item_value'),
0x101: (O, 'tag_attributes'),
0x32: (O, 'tag_attribute'),
0x33: (O, 'tag_meta_item_word'),
0x34: (O, 'tag_meta_item_list'),
0x102: (O, 'tag_crate_deps'),
0x35: (O, 'tag_crate_dep'),
0x103: (S, 'tag_crate_hash'),
0x104: (S, 'tag_crate_crate_name'),
0x36: (S, 'tag_crate_dep_crate_name'),
0x37: (S, 'tag_crate_dep_hash'),
0x38: (S, 'tag_mod_impl'),
0x39: (O, 'tag_item_trait_item'),
0x3a: (S, 'tag_item_trait_ref'),
0x3b: (S, 'tag_item_super_trait_ref'),
0x3c: (S, 'tag_disr_val'),
0x3d: (O, 'tag_path'),
0x3e: (I, 'tag_path_len'),
0x3f: (S, 'tag_path_elem_mod'),
0x40: (S, 'tag_path_elem_name'),
0x41: (O, 'tag_item_field'),
0x42: (S, 'tag_item_field_origin'),
0x43: (A, 'tag_item_variances'),
0x44: (O, 'tag_item_impl_item'),
0x45: (S, 'tag_item_trait_method_explicit_self'),
0x46: (O, 'tag_items_data_item_reexport'),
0x47: (S, 'tag_items_data_item_reexport_def_id'),
0x48: (S, 'tag_items_data_item_reexport_name'),
0x50: (A, 'tag_ast'),
0x51: (A, 'tag_tree'),
0x52: (S, 'tag_id_range'),
0x53: (O, 'tag_table'),
0x54: (L, 'tag_table_id'),
0x55: (A, 'tag_table_val'),
0x56: (O, 'tag_table_def'),
0x57: (O, 'tag_table_node_type'),
0x58: (O, 'tag_table_item_subst'),
0x59: (O, 'tag_table_freevars'),
0x5a: (O, 'tag_table_tcache'),
0x5b: (O, 'tag_table_param_defs'),
0x5c: (O, 'tag_table_mutbl'),
0x5d: (O, 'tag_table_last_use'),
0x5e: (O, 'tag_table_spill'),
0x5f: (O, 'tag_table_method_map'),
0x60: (O, 'tag_table_vtable_map'),
0x61: (O, 'tag_table_adjustments'),
0x62: (O, 'tag_table_moves_map'),
0x63: (O, 'tag_table_capture_map'),
0x64: (O, 'tag_table_closure_tys'),
0x65: (O, 'tag_table_closure_kinds'),
0x66: (O, 'tag_table_upvar_capture_map'),
0x67: (O, 'tag_table_capture_modes'),
0x68: (O, 'tag_table_object_cast_map'),
0x69: (O, 'tag_table_const_qualif'),
0x70: (S, 'tag_item_trait_item_sort'),
0x71: (S, 'tag_item_trait_parent_sort'),
0x72: (S, 'tag_item_impl_type_basename'),
0x105: (S, 'tag_crate_triple'),
0x106: (S, 'tag_dylib_dependency_formats'),
0x107: (O, 'tag_lang_items'),
0x73: (O, 'tag_lang_items_item'),
0x74: (I, 'tag_lang_items_item_id'),
0x75: (I, 'tag_lang_items_item_node_id'),
0x76: (I, 'tag_lang_items_missing'),
0x77: (O, 'tag_item_unnamed_field'),
0x78: (S, 'tag_items_data_item_visibility'),
0x79: (S, 'tag_item_method_tps'),
0x7a: (S, 'tag_item_method_fty'),
0x7b: (S, 'tag_mod_child'),
0x108: (O, 'tag_misc_info'),
0x7c: (O, 'tag_misc_info_crate_items'),
0x7d: (S, 'tag_item_method_provided_source'),
0x7e: (S, 'tag_item_impl_vtables'),
0x109: (O, 'tag_impls'),
0x7f: (O, 'tag_impls_impl'),
0x80: (O, 'tag_items_data_item_inherent_impl'),
0x81: (O, 'tag_items_data_item_extension_impl'),
0x10a: (O, 'tag_native_libraries'),
0x82: (O, 'tag_native_libraries_lib'),
0x83: (S, 'tag_native_libraries_name'),
0x84: (I, 'tag_native_libraries_kind'),
0x10b: (S, 'tag_plugin_registrar_fn'),
0x85: (O, 'tag_method_argument_names'),
0x86: (S, 'tag_method_argument_name'),
0x10c: (O, 'tag_reachable_extern_fns'),
0x87: (I, 'tag_reachable_extern_fn_id'),
0x88: (A, 'tag_items_data_item_stability'),
0x89: (A, 'tag_items_data_item_repr'),
0x10d: (O, 'tag_struct_fields'),
0x8a: (O, 'tag_struct_field'),
0x8b: (I, 'tag_struct_field_id'),
0x8c: (B, 'tag_attribute_is_sugared_doc'),
0x8d: (S, 'tag_trait_def_bounds'),
0x8e: (S, 'tag_items_data_region'),
0x8f: (O, 'tag_region_param_def'),
0x90: (O, 'tag_region_param_def_ident'),
0x91: (S, 'tag_region_param_def_def_id'),
0x92: (L, 'tag_region_param_def_space'),
0x93: (L, 'tag_region_param_def_index'),
0x94: (S, 'tag_type_param_def'),
0x95: (O, 'tag_item_generics'),
0x96: (O, 'tag_method_ty_generics'),
0x97: (S, 'tag_predicate'),
0x98: (S, 'tag_predicate_space'),
0x99: (S, 'tag_predicate_data'),
0x9a: (S, 'tag_unsafety'),
0x9b: (S, 'tag_associated_type_names'),
0x9c: (S, 'tag_associated_type_name'),
0x9d: (S, 'tag_polarity'),
0x10f: (O, 'tag_macro_defs'),
0x9e: (O, 'tag_macro_def'),
0x9f: (S, 'tag_macro_def_body'),
0xa0: (B, 'tag_paren_sugar'),
})
def metadata_from_ar(f):
f.seek(8)
names = None
while True:
header = f.read(60)
assert len(header) == 60
assert header.endswith('\x60\x0a')
name = header[:16]
size = int(header[48:58])
if name == '// ': # name section
names = f.read(size)
else:
name, sep, more = name.partition('/')
more = more.strip()
if more:
assert name == ''
nameoff = int(more)
name = names[nameoff:names.find('/', nameoff)]
if name == 'rust.metadata.bin':
return size, f.read(size)
f.seek(size, 1)
def metadata_from_elf(f):
f.seek(4)
bits, endian, ver = map(ord, f.read(3))
assert ver == 1
typecode = {1: 'I', 2: 'Q'}[bits]
bytes = {1: 4, 2: 8}[bits]
endian = {1: '<', 2: '>'}[endian]
f.seek(0x18 + 2 * bytes)
shoff, = struct.unpack(endian + typecode, f.read(bytes))
f.seek(0x18 + 3 * bytes + 0x0a)
shentsize, shnum, shstrndx = struct.unpack(endian + 'HHH', f.read(6))
f.seek(shoff)
sections = []
for i in xrange(shnum):
sections.append(struct.unpack(endian + 'II' + typecode*4 + 'II' + typecode*2, f.read(0x10 + 6*bytes)))
_, _, _, _, stroff, strsize, _, _, _, _ = sections[shstrndx]
f.seek(stroff)
strpool = f.read(strsize)
for shname, shtype, shflags, shaddr, shoffset, shsize, shlink, shinfo, shaddralign, shentsize in sections:
shname = strpool[shname:strpool.find('\0', shname)]
if shname == '.note.rustc':
metaoff = shoffset
metasize = shsize
break
else:
assert False
f.seek(metaoff)
meta = f.read(metasize)
assert meta.startswith('rust\0\0\0\1')
return len(meta) - 8, zlib.decompress(meta[8:], -zlib.MAX_WBITS)
def read_new_tag(meta, i, j):
assert i < j, 'read_new_tag out-of-bound %r' % meta[i:j]
c = ord(meta[i])
if c < 0xf1:
return c, i + 1
else:
assert i + 1 < j, 'read_new_tag out-of-bound %r' % meta[i:j]
return ((c & 0xf) << 8) | ord(meta[i+1]), i + 2
def read_vint(meta, i, j):
assert i < j, 'read_vint out-of-bound %r' % meta[i:j]
c = ord(meta[i])
if (c & 0x80) == 0x80:
return c & 0x7f, i + 1
elif (c & 0xc0) == 0x40:
assert i + 1 < j, 'read_vint out-of-bound %r' % meta[i:j]
return ((c & 0x3f) << 8) | ord(meta[i+1]), i + 2
elif (c & 0xe0) == 0x20:
assert i + 2 < j, 'read_vint out-of-bound %r' % meta[i:j]
return ((c & 0x1f) << 16) | (ord(meta[i+1]) << 8) | ord(meta[i+2]), i + 3
elif (c & 0xf0) == 0x10:
assert i + 3 < j, 'read_vint out-of-bound %r' % meta[i:j]
return ((c & 0x0f) << 24) | (ord(meta[i+1]) << 16) | (ord(meta[i+2]) << 8) | ord(meta[i+3]), i + 4
else:
assert False, 'read_vint too large %r' % meta[i:i+4]
read_tag = read_vint
def to_vint(n):
if n < 0x7f: # 0x7f is reserved
return '%c' % (0x80 | n)
elif n < 0x4000:
return '%c%c' % (0x40 | (n >> 8), n & 0xff)
elif n < 0x200000:
return '%c%c%c' % (0x20 | (n >> 16), (n >> 8) & 0xff, n & 0xff)
elif n < 0x10000000:
return '%c%c%c%c' % (0x10 | (n >> 24), (n >> 16) & 0xff, (n >> 8) & 0xff, n & 0xff)
else:
assert False, 'to_vint too large %#x' % n
def walk_metadata(meta, i=None, j=None, depth=0, tags=TAGS, rewrite=None):
if i is None: i = 0
if j is None: j = len(meta)
while i < j:
tag, i = read_tag(meta, i, j)
tagtype, tagname = tags.get(tag, (0, hex(tag)))
i_ = i
if new and tag < 0x11:
implicit = True
size = (8, 8, 4, 2, 1, 8, 8, 4, 2, 1, 1, 4, 8, 4, 4, 4, 4)[tag]
else:
implicit = False
size, i = read_vint(meta, i, j)
tagsizelen = i - i_
if depth: print ' '.join(['|']*depth),
print tagname, '(%d)'%size if implicit else size,
if rewrite:
sizev = to_vint(size) if implicit else ''
needssize = tagtype not in FIXED
#rewrite[0].append(to_vint(tag))
#rewrite[0].append(sizev)
#if tagname != 'Label':
# rewrite[1].append(to_vint(tag))
# rewrite[1].append(sizev)
#rewrite[2].append(chr(tag))
#if needssize: rewrite[2].append(sizev)
#rewrite[3].append(chr(tag))
#if needssize: rewrite[3].append(struct.pack('!H', size) if size < 0x8000 else struct.pack('!I', 0x80000000 | size))
#rewrite[4].append(chr(tag))
#if needssize: rewrite[4].append(struct.pack('!I', size))
rewrite[-1][tagname] += 1
if not implicit: rewrite[-2][tagname] += tagsizelen - len(sizev)
if isinstance(tagtype, dict):
print
try:
walk_metadata(meta, i, i+size, depth=depth+1, tags=tagtype, rewrite=rewrite)
except Exception as e:
print ' '.join(['|']*depth) + ' +-> ERROR:', e
else:
try:
data = meta[i:i+size]
s = tagtype(data)
print `s`[:80]
except Exception as e:
print '-> ERROR:', e
if rewrite:
rewrite[0].append(data)
if tagname != 'Label':
rewrite[1].append(data)
rewrite[2].append(data)
rewrite[3].append(data)
rewrite[4].append(data)
i += size
assert i <= j, 'walk_metadata overflow'
new = False
if sys.argv[1] == '-new':
new = True
read_tag = read_new_tag
del sys.argv[1]
with open(sys.argv[1], 'rb') as f:
magic = f.read(4)
if magic == '\x7fELF':
origsize, meta = metadata_from_elf(f)
elif magic == '!<ar':
origsize, meta = metadata_from_ar(f)
else:
assert False
f.seek(0, 2)
totalsize = f.tell()
metalen, = struct.unpack('!I', meta[:4])
meta = meta[4:4+metalen]
assert len(meta) == metalen
rewrite = None
rewrite = [[], [], [], [], [], collections.Counter(), collections.Counter()]
walk_metadata(meta, rewrite=rewrite, tags=NEWTAGS if new else TAGS)
if rewrite:
rewritelabels = ['relax', 'no-label', 'size-elide', 'size-elide-2-or-4', 'size-elide-4']
for i in xrange(0, len(rewrite)-2):
rewrite[i] = ''.join(rewrite[i])
print
print origsize, totalsize
print 'orig', len(meta), ' '.join('%s %s' % (l, len(rr)) for l, rr in zip(rewritelabels, rewrite[:-2]))
print 'orig', len(zlib.compress(meta, 9)), ' '.join('%s %s' % (l, len(zlib.compress(rr, 9))) for l, rr in zip(rewritelabels, rewrite[:-2]))
print
for v, k, opt in sorted([(v,k,rewrite[-2][k]) for k, v in rewrite[-1].items()], reverse=True): print v, k, opt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment