Last active
August 27, 2018 14:11
-
-
Save totem3/fbe292bd0d91287adfc80dbe91b8e1df to your computer and use it in GitHub Desktop.
brainf*ck jit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate libc; | |
use std::env; | |
use std::fs::{self, File}; | |
use std::io::Read; | |
use std::mem; | |
use std::ops::{Index, IndexMut}; | |
#[derive(Debug)] | |
pub struct Inst { | |
kind: InstKind, | |
arg: usize, | |
} | |
#[derive(Debug, PartialEq)] | |
enum InstKind { | |
MoveRight, // > | |
MoveLeft, // < | |
Incr, //+ | |
Decr, //- | |
Output, //. | |
Input, //, | |
JumpIfZero, //[ | |
JumpIfNonZero, //] | |
LoopSetToZero, //[-] | |
LoopIncrPtr, //[>>>] | |
LoopDecrPtr, //[<<<] | |
LoopMoveDataRight, //[->>>+<<<] | |
LoopMoveDataLeft, //[-<<<+>>>] | |
} | |
impl InstKind { | |
fn try_from(f: u8) -> Option<Self> { | |
match f { | |
b'>' => Some(InstKind::MoveRight), | |
b'<' => Some(InstKind::MoveLeft), | |
b'+' => Some(InstKind::Incr), | |
b'-' => Some(InstKind::Decr), | |
b'.' => Some(InstKind::Output), | |
b',' => Some(InstKind::Input), | |
b'[' => Some(InstKind::JumpIfZero), | |
b']' => Some(InstKind::JumpIfNonZero), | |
_ => None, | |
} | |
} | |
} | |
fn optimize_loop(insts: &Vec<Inst>, loop_start: usize) -> Vec<Inst> { | |
let mut new_insts = vec![]; | |
if insts.len() - loop_start == 2 { | |
let inst = &insts[loop_start + 1]; | |
match inst.kind { | |
InstKind::Incr | InstKind::Decr => { | |
new_insts.push(Inst { | |
kind: InstKind::LoopSetToZero, | |
arg: 0, | |
}); | |
} | |
InstKind::MoveRight => { | |
new_insts.push(Inst { | |
kind: InstKind::LoopIncrPtr, | |
arg: inst.arg, | |
}); | |
} | |
InstKind::MoveLeft => { | |
new_insts.push(Inst { | |
kind: InstKind::LoopDecrPtr, | |
arg: inst.arg, | |
}); | |
} | |
_ => {} | |
} | |
} else if insts.len() - loop_start == 5 { | |
if insts[loop_start + 1].kind == InstKind::Decr | |
&& insts[loop_start + 3].kind == InstKind::Incr | |
&& insts[loop_start + 1].arg == 1 | |
&& insts[loop_start + 3].arg == 1 | |
{ | |
if insts[loop_start + 2].kind == InstKind::MoveRight | |
&& insts[loop_start + 4].kind == InstKind::MoveLeft | |
&& insts[loop_start + 2].arg == insts[loop_start + 4].arg | |
{ | |
new_insts.push(Inst { | |
kind: InstKind::LoopMoveDataRight, | |
arg: insts[loop_start + 2].arg, // size to move pointer | |
}) | |
} else if insts[loop_start + 2].kind == InstKind::MoveLeft | |
&& insts[loop_start + 4].kind == InstKind::MoveRight | |
&& insts[loop_start + 2].arg == insts[loop_start + 4].arg | |
{ | |
new_insts.push(Inst { | |
kind: InstKind::LoopMoveDataLeft, | |
arg: insts[loop_start + 2].arg, | |
}); | |
} | |
} | |
} | |
new_insts | |
} | |
fn translate_program(program: &[u8]) -> Vec<Inst> { | |
let mut insts = Vec::with_capacity(program.len()); | |
let mut idx = 0; | |
let mut bracket_stack = Vec::new(); | |
while idx < program.len() { | |
let p = program[idx]; | |
match InstKind::try_from(p) { | |
Some(v) => match v { | |
InstKind::JumpIfZero => { | |
bracket_stack.push(insts.len()); | |
let inst = Inst { | |
kind: InstKind::JumpIfZero, | |
arg: 0, | |
}; | |
insts.push(inst); | |
idx += 1; | |
} | |
InstKind::JumpIfNonZero => { | |
let open_idx = match bracket_stack.pop() { | |
Some(v) => v, | |
None => panic!("unmatched bracket"), | |
}; | |
let new_insts = optimize_loop(&insts, open_idx); | |
if new_insts.is_empty() { | |
insts[open_idx].arg = insts.len(); | |
let inst = Inst { | |
kind: InstKind::JumpIfNonZero, | |
arg: open_idx, | |
}; | |
insts.push(inst); | |
} else { | |
insts.drain(open_idx..); | |
for i in new_insts { | |
insts.push(i); | |
} | |
} | |
idx += 1; | |
} | |
kind => { | |
let start = idx; | |
while program[idx] == p { | |
idx += 1; | |
} | |
let cnt = idx - start; | |
let inst = Inst { kind, arg: cnt }; | |
insts.push(inst); | |
} | |
}, | |
None => { | |
idx += 1; | |
} | |
}; | |
} | |
insts | |
} | |
struct JitMemory { | |
program: *mut u8, | |
position: usize, | |
} | |
const PAGE_SIZE: usize = 4096; | |
impl JitMemory { | |
fn new(size: usize) -> Self { | |
let program: *mut u8; | |
let size = size * PAGE_SIZE; | |
let mut page: *mut libc::c_void; | |
unsafe { | |
page = mem::zeroed(); | |
libc::posix_memalign(&mut page, PAGE_SIZE, size); | |
libc::mprotect( | |
page, | |
size, | |
libc::PROT_EXEC | libc::PROT_READ | libc::PROT_WRITE, | |
); | |
program = mem::transmute(page); | |
} | |
JitMemory { | |
program, | |
position: 0, | |
} | |
} | |
fn emit_byte(&mut self, b: u8) { | |
unsafe { | |
*(self.program.offset(self.position as isize)) = b; | |
} | |
self.position += 1; | |
} | |
fn emit_bytes(&mut self, bytes: &[u8]) { | |
let mut i = 0; | |
for b in bytes { | |
unsafe { | |
*(self.program.offset(self.position as isize + i)) = *b; | |
} | |
i += 1; | |
} | |
self.position += bytes.len(); | |
} | |
fn emit_u64(&mut self, u: u64) { | |
let bytes: [u8; 8] = unsafe { mem::transmute(u) }; | |
self.emit_bytes(&bytes); | |
} | |
fn emit_u32(&mut self, u: u32) { | |
let bytes: [u8; 4] = unsafe { mem::transmute(u) }; | |
self.emit_bytes(&bytes); | |
} | |
fn emit_u16(&mut self, u: u16) { | |
let bytes: [u8; 2] = unsafe { mem::transmute(u) }; | |
self.emit_bytes(&bytes); | |
} | |
} | |
impl Index<usize> for JitMemory { | |
type Output = u8; | |
fn index(&self, index: usize) -> &Self::Output { | |
unsafe { &*self.program.offset(index as isize) } | |
} | |
} | |
impl IndexMut<usize> for JitMemory { | |
fn index_mut(&mut self, index: usize) -> &mut Self::Output { | |
unsafe { &mut *self.program.offset(index as isize) } | |
} | |
} | |
fn main() -> Result<(), String> { | |
let mut args = env::args(); | |
let mut jit_mem = JitMemory::new(20); | |
jit_mem.emit_bytes(&[0x49, 0xbd]); | |
let memory: *mut u8; | |
let _memory = unsafe { libc::malloc(50000) }; | |
memory = unsafe { mem::transmute(_memory) }; | |
jit_mem.emit_u64(memory as u64); | |
let path = match args.nth(1) { | |
Some(v) => v, | |
None => { | |
panic!("no file given"); | |
} | |
}; | |
let mut file = match File::open(&path) { | |
Ok(f) => f, | |
Err(e) => return Err(format!("path: {}, {}", path, e)), | |
}; | |
let meta = match fs::metadata(&path) { | |
Ok(v) => v, | |
Err(e) => return Err(format!("{}", e)), | |
}; | |
let len = meta.len(); | |
let mut buf = Vec::with_capacity(len as usize); | |
let _ = file.read_to_end(&mut buf).unwrap(); | |
let insts = translate_program(&buf); | |
let mut pc = 0; | |
let mut bracket_stack = vec![]; | |
while pc < insts.len() { | |
let inst = &insts[pc]; | |
match inst.kind { | |
InstKind::MoveRight => { | |
if inst.arg < 128 { | |
jit_mem.emit_bytes(&[0x49, 0x83, 0xc5, inst.arg as u8]); | |
} else { | |
jit_mem.emit_bytes(&[0x49, 0x81, 0xc5]); | |
jit_mem.emit_u32(inst.arg as u32); | |
} | |
} | |
InstKind::MoveLeft => { | |
if inst.arg < 128 { | |
jit_mem.emit_bytes(&[0x49, 0x83, 0xed, inst.arg as u8]); | |
} else { | |
jit_mem.emit_bytes(&[0x49, 0x81, 0xed]); | |
jit_mem.emit_u32(inst.arg as u32); | |
} | |
} | |
InstKind::Incr => { | |
if inst.arg < 256 { | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x45, 0x00, inst.arg as u8]); | |
} else if inst.arg < 65536 { | |
jit_mem.emit_bytes(&[0x66, 0x41, 0x81, 0x45, 0x00]); | |
jit_mem.emit_u16(inst.arg as u16); | |
} else { | |
panic!(); | |
} | |
} | |
InstKind::Decr => { | |
if inst.arg < 256 { | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x6d, 0x00, inst.arg as u8]); | |
} else if inst.arg < 65536 { | |
jit_mem.emit_bytes(&[0x66, 0x41, 0x81, 0x6D, 0x00]); | |
jit_mem.emit_u16(inst.arg as u16); | |
} else { | |
panic!(); | |
} | |
} | |
InstKind::Output => { | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc0, 0x01, 0x00, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc7, 0x01, 0x00, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x4c, 0x89, 0xee]); | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc2]); | |
jit_mem.emit_u32(inst.arg as u32); | |
jit_mem.emit_bytes(&[0x0f, 0x05]); | |
} | |
InstKind::Input => { | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc0, 0x00, 0x00, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc7, 0x00, 0x00, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x4c, 0x89, 0xee]); | |
jit_mem.emit_bytes(&[0x48, 0xc7, 0xc2, 0x01, 0x00, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x0f, 0x05]); | |
} | |
InstKind::JumpIfZero => { | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
bracket_stack.push(jit_mem.position); | |
jit_mem.emit_bytes(&[0x0f, 0x84]); | |
jit_mem.emit_u32(0); | |
} | |
InstKind::JumpIfNonZero => { | |
let open_pos = bracket_stack.pop().expect("open bracket stack is empty"); | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
let jump_back_from = jit_mem.position + 6; | |
let jump_back_to = open_pos + 6; | |
let pcrel_offset_back: u32 = if jump_back_to >= jump_back_from { | |
(jump_back_to - jump_back_from) as u32 | |
} else { | |
(!(jump_back_from - jump_back_to) + 1) as u32 | |
}; | |
jit_mem.emit_bytes(&[0x0f, 0x85]); | |
jit_mem.emit_u32(pcrel_offset_back); | |
let jump_forward_from = open_pos + 6; | |
let jump_forward_to = jit_mem.position; | |
let pcrel_offset_forward: u32 = if jump_forward_to >= jump_back_from { | |
(jump_forward_to - jump_forward_from) as u32 | |
} else { | |
(!(jump_forward_from - jump_forward_to) + 1) as u32 | |
}; | |
jit_mem[open_pos + 2] = (pcrel_offset_forward & 0xff) as u8; | |
jit_mem[open_pos + 3] = ((pcrel_offset_forward >> 8) & 0xff) as u8; | |
jit_mem[open_pos + 4] = ((pcrel_offset_forward >> 16) & 0xff) as u8; | |
jit_mem[open_pos + 5] = ((pcrel_offset_forward >> 24) & 0xff) as u8; | |
} | |
InstKind::LoopSetToZero => { | |
jit_mem.emit_bytes(&[0x41, 0xC6, 0x45, 0x00, 0x00]); | |
} | |
InstKind::LoopIncrPtr => { | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x0f, 0x84]); | |
jit_mem.emit_u32(0x12); | |
jit_mem.emit_bytes(&[0x49, 0x81, 0xc5]); | |
jit_mem.emit_u32(inst.arg as u32); | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x0f, 0x85]); | |
jit_mem.emit_u32(0xffffffee); | |
} | |
InstKind::LoopDecrPtr => { | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x0f, 0x84]); | |
jit_mem.emit_u32(0x12); | |
jit_mem.emit_bytes(&[0x49, 0x81, 0xed]); | |
jit_mem.emit_u32(inst.arg as u32); | |
jit_mem.emit_bytes(&[0x41, 0x80, 0x7d, 0x00, 0x00]); | |
jit_mem.emit_bytes(&[0x0f, 0x85]); | |
jit_mem.emit_u32(0xffffffee); | |
} | |
InstKind::LoopMoveDataRight => { | |
// mov al,BYTE PTR [r13+0x0] | |
jit_mem.emit_bytes(&[0x41, 0x8A, 0x45, 0x00]); | |
// addb [r13+inst.arg], rax | |
jit_mem.emit_bytes(&[0x41, 0x00, 0x85]); | |
jit_mem.emit_u32(inst.arg as u32); | |
// movb rax, 0 | |
jit_mem.emit_bytes(&[0x41, 0xC6, 0x45, 0x00, 0x00]); | |
} | |
InstKind::LoopMoveDataLeft => { | |
// mov al,BYTE PTR [r13+0x0] | |
jit_mem.emit_bytes(&[0x41, 0x8A, 0x45, 0x00]); | |
// addq [r13+inst.arg], rax | |
jit_mem.emit_bytes(&[0x41, 0x00, 0x85]); | |
jit_mem.emit_u32((!(inst.arg) + 1) as u32); | |
// movb rax, 0 | |
jit_mem.emit_bytes(&[0x41, 0xC6, 0x45, 0x00, 0x00]); | |
} | |
} | |
pc += 1; | |
} | |
jit_mem.emit_byte(0xc3); | |
unsafe { | |
let f: fn() -> i64 = mem::transmute(jit_mem.program); | |
f(); | |
} | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment