Skip to content

Instantly share code, notes, and snippets.

@ivg
Last active August 12, 2020 15:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivg/7c8f12d9e6ed9164c9c4fd7727529105 to your computer and use it in GitHub Desktop.
Save ivg/7c8f12d9e6ed9164c9c4fd7727529105 to your computer and use it in GitHub Desktop.
Lifting a toy bytecode using Core Theory
open Core_kernel
open Bap_core_theory
open Bap.Std
open KB.Syntax
include Self()
let package = "bytoy"
type name = string [@@deriving equal,sexp]
type oper = Reg of int | Imm of int [@@deriving equal,sexp]
type insn = name * int * oper [@@deriving equal,sexp]
type code = insn array [@@deriving equal,sexp]
type ctxt = {proc : int; pc: int} [@@deriving equal, sexp]
type proc = {name : string; code : code} [@@deriving equal, sexp]
type prog = proc array [@@deriving equal,sexp]
let byte = Theory.Bitv.define 8
let word = Theory.Bitv.define 32
let bool = Theory.Bool.t
let heap = Theory.Mem.define word byte (* global byte-addressable memory *)
let vars = Theory.Mem.define word word (* local word addressable variables *)
module Word = Bitvec.M32
module Byte = Bitvec.M8
let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x)
let r = Array.init 32 ~f:(fun i ->
Theory.Var.define word (sprintf "R%d" i))
let mem = Theory.Var.define heap "heap"
let local = Theory.Var.define vars "locals"
let fp = Theory.Var.define word "FP"
let frame_size = 128
module Language(Core : Theory.Core) = struct
open Core
let reg i = var r.(i)
let imm x = int word x
let op : oper -> _ Theory.Bitv.t Theory.pure = function
| Reg x -> reg x
| Imm x -> imm (Word.int x)
let loads x = signed word @@ load (var mem) x
let loadb x = unsigned word @@ load (var mem) x
let loadw x = loadw word b1 (var mem) x
let saveb p x = store (var mem) p (low byte x)
let savew p x = storew b1 (var mem) p x
let get x = load (var local) (add (var fp) x)
let put p x = store (var local) (add (var fp) p) x
let case_binop name ~matches:ok no = match name with
| "add" -> ok add
| "sub" -> ok sub
| "mul" -> ok mul
| "div" -> ok div
| _ -> no ()
let case_loadw name ~matches:ok no = match name with
| "lds" -> ok loads
| "ldb" -> ok loadb
| "ldw" -> ok loadw
| _ -> no ()
let case_savew name ~matches:ok no = match name with
| "stb" -> ok saveb
| "stw" -> ok savew
| _ -> no ()
let pass = perform Theory.Effect.Sort.bot
let skip = perform Theory.Effect.Sort.bot
let block seq data ctrl =
Theory.Label.for_addr (Word.int seq) >>= fun label ->
blk label data ctrl
let data seq data = block seq data skip
let ctrl seq ctrl = block seq pass ctrl
let const x = int word (Word.int x)
let new_frame pc = seq
(set fp (add (var fp) (const (frame_size+1))))
(set local (put (const ~-1) (const pc)))
let del_frame =
set fp (sub (var fp) (const (frame_size+1)))
let jumps prog {proc; pc} name r x = match x with
| Reg _ -> failwith "ill-formed program: non-const jump"
| Imm dst ->
match name with
| "call" ->
Theory.Label.for_name prog.(dst).name >>= fun dst ->
KB.provide Theory.Label.is_subroutine dst (Some true) >>=
fun () ->
block pc (new_frame pc) (goto dst)
| "ret" ->
block pc del_frame (jmp (get (const frame_size)))
| "jmp" ->
Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst ->
ctrl pc @@ goto dst
| "beq" ->
Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst ->
ctrl pc @@ branch (non_zero (reg r))
(goto dst)
skip
| opcode -> invalid_argf "unknown opcode %S" opcode ()
let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) ->
case_binop name ~matches:(fun f ->
data pc @@ set r.(d) (f (reg d) (op s))) @@ fun () ->
case_loadw name ~matches:(fun load ->
data pc @@ set r.(d) (load (op s))) @@ fun () ->
case_savew name ~matches:(fun save ->
data pc @@ set mem (save (reg d) (op s))) @@ fun () ->
match name with
| "get" ->
data pc @@ set r.(d) (get (op s))
| "set" ->
data pc @@ set local (put (reg d) (op s))
| _ -> jumps prog {proc; pc} name d s
end
let def_only blk = Term.length jmp_t blk = 0
let is_unconditional jmp = match Jmp.cond jmp with
| Bil.Int _ -> true
| _ -> false
let is_last_jump_unconditional blk =
match Term.last jmp_t blk with
| None -> false
| Some jmp -> is_unconditional jmp
let fall_if_possible dst blk =
if is_last_jump_unconditional blk
then blk
else
Term.append jmp_t blk @@
Jmp.reify ~dst ()
let localize_if_call pc blk = match Term.last jmp_t blk with
| None -> KB.return blk
| Some jmp -> match Jmp.alt jmp with
| None -> KB.return blk
| Some alt ->
Theory.Label.for_addr (Word.int (pc+1)) >>| fun dst ->
Term.update jmp_t blk @@
Jmp.reify ()
~tid:(Term.tid jmp)
?cnd:(Jmp.guard jmp)
~dst:(Jmp.resolved dst)
~alt
(* concat two def-only blocks *)
let append_def_only b1 b2 =
let b = Blk.Builder.init ~same_tid:true ~copy_defs:true b1 in
Term.enum def_t b2 |> Seq.iter ~f:(Blk.Builder.add_def b);
Term.enum jmp_t b2 |> Seq.iter ~f:(Blk.Builder.add_jmp b);
Blk.Builder.result b
(* [append xs ys]
pre: the first block of [xs] is the exit block;
pre: the first block of [ys] is the entry block;
pre: the last block of [ys] is the exit block;
post: the first block of [append xs ys] is the new exit block. *)
let append xs ys = match xs, ys with
| [],xs | xs,[] -> xs
| x :: xs, y :: ys when def_only x ->
List.rev_append ys (append_def_only x y :: xs)
| x::xs, y::_ ->
let fall = Jmp.resolved @@ Term.tid y in
let x = fall_if_possible fall x in
List.rev_append ys (x::xs)
let add_blks sub blks = List.fold blks ~init:sub ~f:(Term.append blk_t)
let clone ~tid blk =
let b = Blk.Builder.create ~tid () in
Term.enum phi_t blk |> Seq.iter ~f:(Blk.Builder.add_phi b);
Term.enum def_t blk |> Seq.iter ~f:(Blk.Builder.add_def b);
Term.enum jmp_t blk |> Seq.iter ~f:(Blk.Builder.add_jmp b);
Blk.Builder.result b
let mark_entry_blk pc blks =
Theory.Label.for_addr (Word.int pc) >>| fun tid ->
match blks with
| [] -> [Blk.create ~tid ()]
| blk :: blks -> clone ~tid blk :: blks
let update_call pc blks =
match blks with
| [] -> KB.return []
| last :: blks ->
localize_if_call pc last >>| fun last ->
last :: blks
let build_program lift source =
let prog = Program.create () in
let ctxt = {proc=0; pc=0} in
Seq.of_array source |>
KB.Seq.fold ~init:(ctxt,prog) ~f:(fun (ctxt,prog) proc ->
Seq.of_array proc.code |>
KB.Seq.fold ~init:(ctxt,[]) ~f:(fun (ctxt,blks) insn ->
lift source ctxt insn >>=
mark_entry_blk ctxt.pc >>= fun blks' ->
update_call ctxt.pc (append blks blks') >>| fun blks ->
{ctxt with pc = ctxt.pc+1},blks) >>= fun (ctxt,blks) ->
Theory.Label.for_name proc.name >>| fun tid ->
let sub = Sub.create ~tid ~name:proc.name () in
let sub = add_blks sub (List.rev blks) in
let ctxt = {ctxt with proc = ctxt.proc + 1 } in
(ctxt,Term.append sub_t prog sub)) >>| snd
let load filename =
Sexp.load_sexps filename |>
Array.of_list_map ~f:(function
| Sexp.List (Atom "defun" :: Atom name :: _ :: code) ->
let code = code_of_sexp (Sexp.List code) in
{name; code}
| _ -> failwith "Parser error, expects (defun <name> () code)")
let lifter = KB.Class.declare "bytoy-lifter" ()
let program_t = KB.Domain.optional "program_t" ~equal:Program.equal
let program = KB.Class.property lifter "program"
~public:true
~package
program_t
let bytecode_domain = KB.Domain.optional "bytecode"
~equal:[%equal: prog*ctxt*insn]
~inspect:(fun (_,_,insn) -> sexp_of_insn insn)
let bytecode = KB.Class.property Theory.Program.cls "bytecode"
~public:true
~package
bytecode_domain
let provide_semantics () =
KB.promise Theory.Program.Semantics.slot @@ fun insn ->
Theory.(instance>=>require) () >>= fun (module Core) ->
let module Lift = Language(Core) in
KB.collect bytecode insn >>= function
| None -> KB.return Insn.empty
| Some (prog,ctxt,code) -> Lift.insn prog ctxt code
let lift prog =
let collect_bir prog ctxt code =
Theory.Label.for_addr (Word.int ctxt.pc) >>= fun insn ->
KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () ->
KB.collect Theory.Program.Semantics.slot insn >>| fun sema ->
KB.Value.get Term.slot sema in
build_program collect_bir prog
let run_lifter prog =
let obj =
KB.Object.create lifter >>= fun lifter ->
lift prog >>= fun prog ->
KB.provide program lifter (Some prog) >>| fun () ->
lifter in
match KB.run lifter obj (Toplevel.current ()) with
| Error conflict ->
error "ill-formed program: %a" KB.Conflict.pp conflict;
invalid_arg "ill-formed program"
| Ok (value,state) ->
Toplevel.set state;
match KB.Value.get program value with
| None -> assert false
| Some prog -> prog
let () = Project.Input.register_loader "bytoy" @@ fun filename ->
let empty = Memmap.empty in
let source = load filename in
provide_semantics ();
let prog = run_lifter source in
Project.Input.create `unknown filename ~code:empty ~data:empty
~finish:(fun proj -> Project.with_program proj prog)
@ivg
Copy link
Author

ivg commented Apr 20, 2020

Building

Put this file into a separate folder and issue the following command:

touch bytoy.mli # needed only once
bapbuild bytoy.plugin && bapbundle install bytoy.plugin

Using

Using the following example program (put it into the example.bytoy file):

(defun main ()
  (call 0 (imm 1))
  (mul 2 (imm 0))
  (add 1 (reg 2))
  (beq 1 (imm 2))
  (jmp 0 (imm 2))
  (call 0 (imm 2))
  (call 0 (imm 3)))

(defun hello-world ()
  (get 1 (imm 0))
  (get 2 (imm 1))
  (add 1 (reg 2))
  (ret 0 (imm 0)))

(defun is-zero ()
  (ret 0 (imm 0)))

(defun abort ()
  (ret 0 (imm 0)))

We can get lift and gets its graph with the following commands:

bap example.bytoy --loader=bytoy -dcfg > example.dot

You can then view the generated graph either using xdot example.dot or translate it to an image, e.g., with dot example.dot -Tpng -o example.png, the sample output is provided below:

example

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment