Skip to content

Instantly share code, notes, and snippets.

@Luavis
Created June 22, 2020 03:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Luavis/b3095217bdc5f8d02ab7c5a7546480a1 to your computer and use it in GitHub Desktop.
Save Luavis/b3095217bdc5f8d02ab7c5a7546480a1 to your computer and use it in GitHub Desktop.
Java decompiler
#!/usr/bin/env python
import struct
import re
from io import BytesIO
import sys
from code_attribute import read_code_attribute
uint8 = lambda x: struct.unpack('>B', x)[0]
uint16 = lambda x: struct.unpack('>H', x)[0]
int32 = lambda x: struct.unpack('>l', x)[0]
uint32 = lambda x: struct.unpack('>L', x)[0]
int64 = lambda x: struct.unpack('>q', x)[0]
float32 = lambda x: struct.unpack('>f', x)[0]
float64 = lambda x: struct.unpack('>f', x)[0]
cp = []
class ContantPool:
def __init__(self, pool):
self.pool = pool
def __getitem__(self, index):
tag, const = self.pool[index]
if tag in (7, 8):
return self[const]
elif tag in (9, 10, 11, 12):
return (self[const[0]], self[const[1]])
else:
return const
def read_acc_flag(acc_table, acc):
acc_flags = []
for key, value in acc_table.items():
if acc & key != 0:
acc_flags.append(value)
return acc_flags
def read_contant_pool(stream, pool_size):
# constant pool index start with 1
pool = [None]
for _ in range(pool_size - 1):
tag = uint8(stream.read(1))
const = None
if tag == 1:
size = uint16(stream.read(2))
const = stream.read(size).decode('utf-8')
elif tag == 3:
const = int32(stream.read(4))
elif tag == 4:
const = float32(stream.read(4))
elif tag == 5:
const = int64(stream.read(8))
elif tag == 6:
const = float64(stream.read(8))
elif tag in (7, 8):
const = uint16(stream.read(2))
elif tag in (9, 10, 11, 12):
# class ref, name and type
const = (uint16(stream.read(2)), uint16(stream.read(2)))
else:
print('not found cp tag', tag)
break
pool.append((tag, const))
return ContantPool(pool)
def read_attrs(stream, size):
attrs = {}
for _ in range(size):
name = cp[uint16(stream.read(2))]
attr_len = uint32(stream.read(4))
attrs[name] = stream.read(attr_len)
return attrs
def read_fields(stream, cp, size):
fields = []
acc_table = {
0x0001: 'public',
0x0002: 'private',
0x0004: 'protected',
0x0008: 'static',
0x0010: 'final',
0x0040: 'volatile',
0x0080: 'transient',
0x1000: 'synthetic',
0x4000: 'enum',
}
for _ in range(size):
acc_flags = read_acc_flag(acc_table, uint16(stream.read(2)))
name = cp[uint16(stream.read(2))]
descriptor = cp[uint16(stream.read(2))]
attr_count = uint16(stream.read(2))
attrs = read_attrs(stream, attr_count)
fields.append((
acc_flags,
name,
descriptor,
attrs,
))
return fields
def read_methods(stream, cp, size):
methods = []
acc_table = {
0x0001: 'public',
0x0002: 'private',
0x0004: 'protected',
0x0008: 'static',
0x0010: 'final',
0x0020: 'synchronized',
0x0040: 'bridge',
0x0080: 'varargs',
0x0100: 'native',
0x0400: 'abstract',
0x0800: 'strict',
0x1000: 'synthetic',
}
for _ in range(size):
acc_flags = read_acc_flag(acc_table, uint16(stream.read(2)))
name = cp[uint16(stream.read(2))]
descriptor = cp[uint16(stream.read(2))]
attr_count = uint16(stream.read(2))
attrs = read_attrs(stream, attr_count)
methods.append((
acc_flags,
name,
descriptor,
attrs,
))
return methods
def read_header(stream):
version_table = {
0x3A: 'Java SE 14',
0x39: 'Java SE 13',
0x38: 'Java SE 12',
0x37: 'Java SE 11',
0x36: 'Java SE 10',
0x35: 'Java SE 9',
0x34: 'Java SE 8',
0x33: 'Java SE 7',
0x32: 'Java SE 6.0',
0x31: 'Java SE 5.0',
0x30: 'JDK 1.4',
0x2F: 'JDK 1.3',
0x2E: 'JDK 1.2',
0x2D: 'JDK 1.1',
}
minor = uint16(stream.read(2))
major = uint16(stream.read(2))
cp_size = uint16(stream.read(2))
cp = read_contant_pool(stream, cp_size)
version = major_version = version_table.get(major)
return (version, major, minor, cp)
def read_body(stream, cp):
acc_table = {
0x0001: 'public',
0x0010: 'final',
0x0020: 'super',
0x0200: 'interface',
0x0400: 'abstract',
0x1000: 'synthetic',
0x2000: 'annotation',
0x4000: 'enum',
}
access_flags = uint16(stream.read(2))
class_idx = uint16(stream.read(2))
super_idx = uint16(stream.read(2))
interface_size = uint16(stream.read(2))
# TODO: read interface
field_size = uint16(stream.read(2))
fields = read_fields(stream, cp, field_size)
method_size = uint16(stream.read(2))
methods = read_methods(stream, cp, method_size)
attr_count = uint16(stream.read(2))
attrs = read_attrs(stream, attr_count)
acc_flags = read_acc_flag(acc_table, access_flags)
class_name = cp[class_idx]
super_name = cp[super_idx]
return (
acc_flags, class_name, super_name,
fields, methods, attrs,
)
def format_method_description(desc):
a = re.compile(r'^\(([^\)]*)\)([\S]+)$')
mo = a.search(desc)
params = mo.group(1).split(';')[:-1]
ret = mo.group(2)
return f"({', '.join(params)}): {ret}"
def main(name, cmd, params):
global cp
with open(f'./{name}.class', 'rb') as f:
if f.read(4) == b'\xca\xfe\xba\xbe':
version, major, minor, cp = read_header(f)
# print(f"{version} version: {major}.{minor}")
acc_flags, class_name, super_name, fields, methods, attrs = \
read_body(f, cp)
field_decls = map(lambda x: f"{' '.join(x[0])} {x[2]} {x[1]};", fields)
method_decls = map(lambda x: f"{' '.join(x[0])} {x[1]} {format_method_description(x[2])}", methods)
if cmd == 'methods':
for i, method_decl in enumerate(method_decls):
print(f'{i}: {method_decl}')
elif cmd == 'method':
index = int(params[0])
print(list(method_decls)[index])
codes = read_code_attribute(cp, methods[index][3]['Code'])
print('\n'.join(map(lambda x: f'{x.offset}: {x}', codes)))
if __name__ == '__main__':
name = sys.argv[1]
cmd = sys.argv[2]
main(name, cmd, sys.argv[3:])
from io import BytesIO
import struct
uint8 = lambda x: struct.unpack('>B', x)[0]
int16 = lambda x: struct.unpack('>h', x)[0]
uint16 = lambda x: struct.unpack('>H', x)[0]
uint32 = lambda x: struct.unpack('>L', x)[0]
class OpParser:
def __init__(self, mnemonic, param_size=0, reader=None):
self.mnemonic = mnemonic
self.param_size = param_size
self.reader = reader
def parse(self, stream, cp, offset):
params = None
if self.param_size != 0:
data = stream.read(self.param_size)
if self.reader is not None:
params = self.reader(self, cp, data)
op = Op(self.mnemonic, params, offset)
offset += 1 + self.param_size
return op, offset
class Op:
def __init__(self, mnemonic, params, offset):
self.mnemonic = mnemonic
self.params = params
self.offset = offset
def __repr__(self):
if self.params is None:
return self.mnemonic
else:
params = ' '.join(map(lambda x: str(x), self.params))
return f"{self.mnemonic} {params}"
def index_reader(builder, cp, data):
if builder.param_size == 2:
index = uint16(data)
return [cp[index]]
elif builder.param_size == 1:
index = uint8(data)
return [cp[index].encode('utf-8')]
def uint8_reader(builder, cp, data):
return [uint8(data)]
def uint16_reader(builder, cp, data):
return [uint16(data)]
def int16_reader(builder, cp, data):
return [int16(data)]
def invoke_interface_reader(builder, cp, data):
index = uint16(data[:2])
return [cp[index], data[2], data[3]]
def iinc_reader(builder, cp, data):
index = uint8(data[0:1])
return [index, data[1]]
def primitive_type_reader(builder, cp, data):
index = uint8(data)
return [{
4: 'BOOLEAN',
5: 'CHAR',
6: 'FLOAT',
7: 'DOUBLE',
8: 'BYTE',
9: 'SHORT',
10: 'INT',
11: 'LONG',
}[index]]
code_table = {
b'\x00': OpParser('nop'),
b'\xbb': OpParser('new', 2, index_reader),
b'\x59': OpParser('dup'),
b'\xbe': OpParser('arraylength'),
b'\xb7': OpParser('invokespecial', 2, index_reader),
b'\xb8': OpParser('invokestatic', 2, index_reader),
b'\x3a': OpParser('astore', 1, uint8_reader),
b'\x4b': OpParser('astore_0'),
b'\x4c': OpParser('astore_1'),
b'\x4d': OpParser('astore_2'),
b'\x4e': OpParser('astore_3'),
b'\x41': OpParser('lstore_2'),
b'\x03': OpParser('iconst_0'),
b'\x04': OpParser('iconst_1'),
b'\x05': OpParser('iconst_2'),
b'\x06': OpParser('iconst_3'),
b'\x07': OpParser('iconst_4'),
b'\x08': OpParser('iconst_5'),
b'\x58': OpParser('pop2'),
b'\x36': OpParser('istore', 1, uint8_reader),
b'\x3c': OpParser('istore_1'),
b'\x3d': OpParser('istore_2'),
b'\x3e': OpParser('istore_3'),
b'\x19': OpParser('aload', 1, uint8_reader),
b'\x32': OpParser('aaload'),
b'\x2a': OpParser('aload_0'),
b'\x2b': OpParser('aload_1'),
b'\x2c': OpParser('aload_2'),
b'\x2d': OpParser('aload_3'),
b'\x15': OpParser('iload', 1, uint8_reader),
b'\x1b': OpParser('iload_1'),
b'\x1c': OpParser('iload_2'),
b'\x1d': OpParser('iload_3'),
b'\xc6': OpParser('ifnull', 2, int16_reader),
b'\xa2': OpParser('if_icmpge', 2, int16_reader),
b'\x99': OpParser('ifeq', 2, int16_reader),
b'\x9a': OpParser('ifne', 2, int16_reader),
b'\xa7': OpParser('goto', 2, int16_reader),
b'\xbf': OpParser('athrow'),
b'\xb9': OpParser('invokeinterface', 4, invoke_interface_reader),
b'\xa5': OpParser('if_acmpeq', 2, int16_reader),
b'\x57': OpParser('pop'),
b'\xbc': OpParser('newarray', 1, primitive_type_reader),
b'\xbd': OpParser('anewarray', 2, index_reader),
b'\xb2': OpParser('getstatic', 2, index_reader),
b'\xb6': OpParser('invokevirtual', 2, index_reader),
b'\x84': OpParser('iinc', 2, iinc_reader),
b'\xb0': OpParser('areturn'),
b'\xb1': OpParser('return'),
b'\x01': OpParser('aconst_null'),
b'\x12': OpParser('ldc', 1, index_reader),
b'\x5b': OpParser('dup_x2'),
b'\x5f': OpParser('swap'),
b'\x64': OpParser('isub'),
b'\x0c': OpParser('fconst_1'),
b'\x0d': OpParser('fconst_2'),
b'\x78': OpParser('ishl'),
b'\x82': OpParser('ixor'),
b'\xc0': OpParser('checkcast', 2, index_reader),
b'\x92': OpParser('i2c'),
b'\x55': OpParser('castore'),
b'\x9c': OpParser('ifge', 2, int16_reader),
b'\xc7': OpParser('ifnonnull', 2, int16_reader),
b'\xac': OpParser('ireturn'),
}
def read_byte_code(cp, stream):
codes = []
offset = 0
while True:
op_code = stream.read(1)
parser = code_table.get(op_code)
if parser != None:
op, offset = parser.parse(stream, cp, offset)
codes.append(op)
elif op_code == b'':
return codes
else:
print(f"code not found {hex(uint8(op_code))}")
print()
break
return codes
def read_code_attribute(cp, code_attribute):
stream = BytesIO(code_attribute)
max_stack = uint16(stream.read(2))
max_locals = uint16(stream.read(2))
code_length = uint32(stream.read(4))
code = stream.read(code_length)
codes = read_byte_code(cp, BytesIO(code))
return codes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment