Skip to content

Instantly share code, notes, and snippets.

@impiaaa
Last active December 15, 2018 00:49
Show Gist options
  • Save impiaaa/65d27f54d3a4f5e6899d4f3862c3b374 to your computer and use it in GitHub Desktop.
Save impiaaa/65d27f54d3a4f5e6899d4f3862c3b374 to your computer and use it in GitHub Desktop.
import dis, inspect, typing
import math, sys
from operator import or_
from functools import reduce
#def putchar(n: int) -> int: pass
def fact(n: int) -> int:
if n <= 1: return 1
else: return n*fact(n-1)
def test1(n: int):
if n == 1: putchar(49)
elif n == 2: putchar(50)
putchar(63)
def test2(n: int):
i = 33
while i < n:
putchar(i)
i = i+1
def test3(n: int) -> int:
x = 14 if n%2 == 0 else 0
x = x+2
return x
alwaysjump = {dis.opmap['JUMP_FORWARD'], dis.opmap['JUMP_ABSOLUTE']}
hasjump = set(dis.hasjrel+dis.hasjabs)
def splitIntoBasicBlocks(instructions):
currentBlock = []
for inst in instructions:
if inst.is_jump_target:
if len(currentBlock) > 0: yield currentBlock
currentBlock = []
currentBlock.append(inst)
if inst.opcode in hasjump:
if len(currentBlock) > 0: yield currentBlock
currentBlock = []
if len(currentBlock) > 0: yield currentBlock
def findSourcesDestinations(basicBlocks):
destinations = [None]*len(basicBlocks)
sources = [set() for block in basicBlocks]
for i, blockInsts in enumerate(basicBlocks):
nextDest = None
instDest = None
if i < len(basicBlocks)-1 and blockInsts[-1].opcode not in alwaysjump:
nextDest = i+1
sources[i+1].add(i)
if blockInsts[-1].opcode in hasjump:
addr = blockInsts[-1].argval
for j, otherBlockInsts in enumerate(basicBlocks):
if otherBlockInsts[0].offset == addr:
instDest = j
sources[j].add(i)
break
destinations[i] = (instDest, nextDest)
return list([(sources[i], destinations[i]) for i in range(len(basicBlocks))])
def printInstruction(inst, rwidth):
print(inst.opname, end='')
if inst.arg is None:
print()
else:
print(str(inst.arg).rjust(rwidth-len(inst.opname)), end='')
if len(inst.argrepr) == 0:
print()
else:
print(' (%s)'%inst.argrepr)
def printInstructions(insts):
maxwidth = max([len(inst.opname) for inst in insts])+max([len(str(inst.arg)) for inst in insts])+1
for inst in insts: printInstruction(inst, maxwidth)
typeMap = {int: "i"+str(int(math.log2(sys.maxsize+1)+1)), float: "double", bool: "i1"}
class StackItem:
def fullName(self, defaultType="void"):
return "%s %s"%(self.typeName(defaultType), self)
def typeName(self, defaultType="void"):
return typeMap.get(self.type, defaultType)+('*'*self.pointer)
def __hash__(self):
return hash((self.name, self.type, self.pointer))
def __eq__(self, other):
return (self.name, self.type, self.pointer) == (other.name, other.type, other.pointer)
class Constant(StackItem):
def __init__(self, val):
self.name = repr(val)
self.type = type(val)
self.pointer = 0
def __str__(self):
return "%s"%self.name
class Global(StackItem):
def __init__(self, name):
self.name = name
self.type = None
self.pointer = 0
def __str__(self):
return "@%s"%self.name
class Register(StackItem):
def __init__(self, name, type=None, pointer=0):
self.name = str(name)
self.type = type
self.pointer = pointer
def __str__(self):
return "%%%s"%self.name
instTranslations = [None,
(None, 0, 1), # POP_TOP
('', 2, 2), # ROT_TWO TODO
('', 3, 3), # ROT_THREE TODO
('', 2, 1), # DUP_TOP
('', 4, 2), # DUP_TOP_TWO
None,
None,
None,
('nop', 0, 0), # NOP
('mul 1', 1, 1), # UNARY_POSITIVE
('mul -1', 1, 1), # UNARY_NEGATIVE
('icmp eq 0', 1, 1), # UNARY_NOT
None,
None,
('xor -1', 1, 1), # UNARY_INVERT
('', 1, 2), # BINARY_MATRIX_MULTIPLY TODO
('', 2, 2), # INPLACE_MATRIX_MULTIPLY TODO
None,
('powi', 1, 2), # BINARY_POWER
('mul', 1, 2), # BINARY_MULTIPLY
None,
('srem', 1, 2), # BINARY_MODULO TODO
('add', 1, 2), # BINARY_ADD
('sub', 1, 2), # BINARY_SUBTRACT
('extractvalue', 1, 2), # BINARY_SUBSCR
('sdiv', 1, 2), # BINARY_FLOOR_DIVIDE
('fdiv', 1, 2), # BINARY_TRUE_DIVIDE
('sdiv', 2, 2), # INPLACE_FLOOR_DIVIDE
('fdiv', 2, 2), # INPLACE_TRUE_DIVIDE
None, None, None, None,
None, None, None, None,
None, None, None, None,
None, None, None, None,
None, None, None, None,
('call get_awaittable iter',
1, 1), # GET_AITER
('call get_awaittable next',
1, 1), # GET_ANEXT
('', 2, 1), # BEFORE_ASYNC_WITH TODO
None,
None,
('add', 2, 2), # INPLACE_ADD
('sub', 2, 2), # INPLACE_SUBTRACT
('mul', 2, 2), # INPLACE_MULTIPLY
None,
('srem', 2, 2), # INPLACE_MODULO TODO
('insertvalue', 1, 2), # STORE_SUBSCR
('', 0, 2), # DELETE_SUBSCR TODO
('shl', 1, 2), # BINARY_LSHIFT
('lshr', 1, 2), # BINARY_RSHIFT
('and', 1, 2), # BINARY_AND
('xor', 1, 2), # BINARY_XOR
('or', 1, 2), # BINARY_OR
('powi', 2, 2), # INPLACE_POWER
'GET_ITER',
'GET_YIELD_FROM_ITER',
'PRINT_EXPR',
'LOAD_BUILD_CLASS',
'YIELD_FROM',
'GET_AWAITABLE',
None,
'INPLACE_LSHIFT',
'INPLACE_RSHIFT',
'INPLACE_AND',
'INPLACE_XOR',
'INPLACE_OR',
'BREAK_LOOP',
'WITH_CLEANUP_START',
'WITH_CLEANUP_FINISH',
('ret', 0, 1), # RETURN_VALUE
'IMPORT_STAR',
None,
'YIELD_VALUE',
'POP_BLOCK',
'END_FINALLY',
'POP_EXCEPT',
'STORE_NAME',
'DELETE_NAME',
'UNPACK_SEQUENCE',
'FOR_ITER',
'UNPACK_EX',
'STORE_ATTR',
'DELETE_ATTR',
'STORE_GLOBAL',
'DELETE_GLOBAL',
None,
('', 0, 0), # LOAD_CONST
'LOAD_NAME',
'BUILD_TUPLE',
'BUILD_LIST',
'BUILD_SET',
'BUILD_MAP',
'LOAD_ATTR',
('icmp', 1, 2), # COMPARE_OP
'IMPORT_NAME',
'IMPORT_FROM',
('br', 0, 0), # JUMP_FORWARD
'JUMP_IF_FALSE_OR_POP',
'JUMP_IF_TRUE_OR_POP',
('br', 0, 0), # JUMP_ABSOLUTE
('br', 0, 1), # POP_JUMP_IF_FALSE TODO
('br', 0, 1), # POP_JUMP_IF_TRUE
('', 0, 0), # LOAD_GLOBAL TODO
None,
None,
'CONTINUE_LOOP',
('br', 0, 0), # SETUP_LOOP
'SETUP_EXCEPT',
'SETUP_FINALLY',
None,
('load', 1, 0), # LOAD_FAST
('store', 0, 1), # STORE_FAST
'DELETE_FAST',
None,
None,
None,
'RAISE_VARARGS',
('call', 0, 0), # CALL_FUNCTION TODO
'MAKE_FUNCTION',
'BUILD_SLICE',
'MAKE_CLOSURE',
'LOAD_CLOSURE',
'LOAD_DEREF',
'STORE_DEREF',
'DELETE_DEREF',
None,
'CALL_FUNCTION_VAR',
'CALL_FUNCTION_KW',
'CALL_FUNCTION_VAR_KW',
'SETUP_WITH',
'EXTENDED_ARG',
'LIST_APPEND',
'SET_ADD',
'MAP_ADD',
'LOAD_CLASSDEREF',
'BUILD_LIST_UNPACK',
'BUILD_MAP_UNPACK',
'BUILD_MAP_UNPACK_WITH_CALL',
'BUILD_TUPLE_UNPACK',
'BUILD_SET_UNPACK',
'SETUP_ASYNC_WITH']
def translateBlock(insts, destinations):
global lastReg
stackDebt = []
stack = []
asm = []
names = set()
for inst in insts:
x = instTranslations[inst.opcode]
if isinstance(x, str):
asm.append(inst.opname)
continue
instTranslation, pushCount, popCount = x
if inst.opname == 'CALL_FUNCTION': popCount = inst.argval+1
args = []
instType = None
for i in range(popCount):
if len(stack) > 0:
arg = stack.pop()
args.append(arg)
if arg.type is not None:
instType = arg.type
else:
arg = Register(lastReg)
stackDebt.append(arg)
args.append(arg)
lastReg += 1
args.reverse()
ret = []
if inst.opname == 'COMPARE_OP':
instType = bool
for i in range(pushCount):
r = Register(lastReg, instType)
stack.append(r)
ret.append(r)
lastReg += 1
if instTranslation is not None:
extarg = inst.argrepr
argtype = typeMap.get(type(inst.argval), "void")
if inst.opname == 'COMPARE_OP':
extarg = ('slt', 'sle', 'eq', 'ne', 'sgt', 'sge')[inst.arg]
if inst.opname == 'LOAD_CONST':
stack.append(Constant(inst.argval))
elif inst.opname == 'LOAD_GLOBAL':
stack.append(Global(extarg))
elif inst.opcode in hasjump:
if len(args) > 0:
labels = ', '.join(['label %%label%d'%x for x in (reversed(destinations) if 'FALSE' in inst.opname else destinations) if x is not None])
asm.append('%s %s, %s'%(instTranslation,
', '.join([x.fullName('i1') for x in args]),
labels))
else:
asm.append('%s label %%label%d'%(instTranslation, [d for d in destinations if d][-1]))
elif inst.opname == 'CALL_FUNCTION':
fn = args.pop(0)
stored = []
for arg in args:
asm.append('%%%d = alloca %s'%(lastReg, arg.typeName()))
asm.append('store %s, %s* %%%d'%(arg.fullName(), arg.typeName(), lastReg))
stored.append(Register(lastReg, type=arg.type, pointer=arg.pointer+1))
lastReg += 1
if isinstance(fn, Global) and fn.name in globals():
returnType = inspect.signature(globals()[fn.name]).return_annotation
typeName = typeMap.get(returnType, 'void')
if returnType == inspect.Signature.empty:
# still need to push 'none' to the stack
ret = [Register(lastReg)]
else:
ret = [Register(lastReg, returnType)]
lastReg += 1
else:
returnType = None
ret = [Register(lastReg)]
lastReg += 1
typeName = ret[0].typeName()
stack.extend(ret)
if returnType == inspect.Signature.empty:
asm.append('%s %s %s(%s)'%(instTranslation,
typeName,
fn,
', '.join([x.fullName() for x in stored])))
else:
asm.append('%s = %s %s %s(%s)'%(ret[0],
instTranslation,
typeName,
fn,
', '.join([x.fullName() for x in stored])))
elif inst.opname == 'LOAD_FAST':
asm.append('%s = %s %s, %s* %%%s'%(', '.join(map(str, ret)),
instTranslation,
ret[0].typeName(),
argtype,
extarg))
elif inst.opname == 'STORE_FAST':
n = Register(extarg, type=type(inst.argval), pointer=1)
asm.append('%s %s, %s'%(instTranslation,
', '.join([x.fullName() for x in args]),
n.fullName()))
names.add(n)
elif inst.opname == 'RETURN_VALUE' and args[0].type == type(None):
asm.append('ret void')
break
elif len(ret) == 0:
asm.append('%s %s %s'%(instTranslation,
extarg,
', '.join([x.fullName() for x in args])))
if inst.opname == 'RETURN_VALUE': break
else:
asm.append('%s = %s %s %s %s'%(', '.join(map(str, ret)),
instTranslation,
extarg,
args[0].typeName(),
', '.join(map(str, args))))
if insts[-1].opcode not in hasjump and insts[-1].opname != 'RETURN_VALUE':
asm.append('br label %label'+str(destinations[1]))
return asm, names, stackDebt, stack
for func in (fact, test1, test2, test3):
lastReg = 0
blocks = list(splitIntoBasicBlocks(dis.get_instructions(func)))
srcdest = findSourcesDestinations(blocks)
#print(func.__name__)
#for i, (block, (sources, destinations)) in enumerate(zip(blocks, srcdest)):
# print(i, 'comes from', ', '.join(map(str, sources)))
# printInstructions(block)
# print('goes to', ', '.join(map(str, destinations)))
# print()
sig = inspect.signature(func)
types = typing.get_type_hints(func)
assembly = [translateBlock(block, destinations) for block, (sources, destinations) in zip(blocks, srcdest)]
print('define %s @%s(%s) {'%(typeMap.get(sig.return_annotation, "void"),
func.__name__,
', '.join(['%s* %%%s'%(typeMap.get(types.get(x, None), "void"), x) for x in sig.parameters])))
allnames = reduce(or_, [names for asm, names, stackDebt, stack in assembly])
if len(allnames) > 0:
print('alloca:')
for name in allnames:
name.pointer -= 1
print(' %s = alloca %s'%(name, name.typeName()))
print(' br label %label0')
print()
for asmIdx, (asm, names, stackDebt, stack) in enumerate(assembly):
print('label'+str(asmIdx)+':')
for phiIdx, phiReg in enumerate(stackDebt):
if phiReg.type is None:
phiReg.type = assembly[list(srcdest[asmIdx][0])[0]][3][phiIdx].type
print(' '+str(phiReg), '= phi '+phiReg.typeName(), end=' ')
print(', '.join(['[ %s, %%label%d ]'%(assembly[sourceIdx][3][phiIdx], sourceIdx) for sourceIdx in srcdest[asmIdx][0]]))
print('\n'.join([' '+line for line in asm]))
print()
print('}')
print()
declare i32 @putchar(i32 %c)
define i64 @fact(i64* %n) {
label0:
%0 = load i64, i64* %n
%1 = icmp sle i64 %0, 1
br i1 %1, label %label1, label %label2
label1:
ret i64 1
label2:
%2 = load i64, i64* %n
%3 = load i64, i64* %n
%4 = sub i64 %3, 1
%5 = alloca i64
store i64 %4, i64* %5
%6 = call i64 @fact(i64* %5)
%7 = mul i64 %2, %6
ret i64 %7
}
define void @test1(i64* %n) {
label0:
%0 = load i64, i64* %n
%1 = icmp eq i64 %0, 1
br i1 %1, label %label1, label %label2
label1:
%2 = call i32 @putchar(i32 49)
br label %label4
label2:
%3 = load i64, i64* %n
%4 = icmp eq i64 %3, 2
br i1 %4, label %label3, label %label4
label3:
%5 = call i32 @putchar(i32 50)
br label %label4
label4:
%6 = call i32 @putchar(i32 63)
ret void
}
define void @test2(i64* %n) {
alloca:
%i = alloca i64
br label %label0
label0:
store i64 33, i64* %i
br label %label1
label1:
%0 = load i64, i64* %i
%1 = load i64, i64* %n
%2 = icmp slt i64 %0, %1
br i1 %2, label %label2, label %label3
label2:
%3 = load i64, i64* %i
%4 = trunc i64 %3 to i32
%5 = call i32 @putchar(i32 %4)
%6 = load i64, i64* %i
%7 = add i64 %6, 1
store i64 %7, i64* %i
br label %label1
label3:
br label %label4
label4:
ret void
}
define i64 @test3(i64* %n) {
alloca:
%x = alloca i64
br label %label0
label0:
%0 = load i64, i64* %n
%1 = srem i64 %0, 2
%2 = icmp eq i64 %1, 0
br i1 %2, label %label1, label %label2
label1:
br label %label3
label2:
br label %label3
label3:
%3 = phi i64 [ 14, %label1 ], [ 0, %label2 ]
store i64 %3, i64* %x
%4 = load i64, i64* %x
%5 = add i64 %4, 2
store i64 %5, i64* %x
%6 = load i64, i64* %x
ret i64 %6
}
#
# This file was generated by the Retargetable Decompiler
# Website: https://retdec.com
# Copyright (c) 2018 Retargetable Decompiler <info@retdec.com>
#
# ------------------------ Functions -------------------------
def fact(a1):
if a1 <= 1:
return 1
return a1 * fact(a1 - 1)
def test1(a1):
if *a1 == 1:
putchar(49)
else:
if *a1 == 2:
putchar(50)
putchar(63)
def test2(a1):
c = 33
while c < *a1:
putchar(c)
c += 1
def test3(a1):
if *a1 % 2 == 0:
v1 = 14
else:
v1 = 0
return v1 + 2
# --------------------- Meta-Information ---------------------
# Detected functions: 4
# Decompilation date: 2018-07-02 18:12:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment