Last active
June 22, 2023 13:23
-
-
Save oxalica/5cf57758c50ba3dc042fd34916e0b2e8 to your computer and use it in GitHub Desktop.
Interpreter benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use anyhow::{bail, Context, Result}; | |
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | |
use dynasm::dynasm; | |
use dynasmrt::{AssemblyOffset, DynasmApi, DynasmLabelApi, ExecutableBuffer}; | |
criterion_group!(benches, quine); | |
criterion_main!(benches); | |
/// Ref: https://gist.github.com/blahgeek/dd58a29a37e97fdf29f0 | |
const QUINE_SRC: &str = ">>>>++>++>++>++++>+++>+++++>++>++++>++++>+++>+>+++>+>++>++>++++++>+++++>+++>++++>++>+>+++>++++++>+++++>++>++++>++>+++++>++>++++>++>+++++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>+++++>++>+++++>+++>+++>++++++>++>++>++>++++>++++>++>+++++>++>++++>++>+++++>++>+>+>+>+>+>+>+>+>++++>+++>+>+>+>+>+>++>++++++>+++++>+++>+>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>++++>+++>+>+>+>+>++>++++++>+++++>+++>+>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++++++>++++++>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>+>++++>+++>+>+>+>+>+>+>++>++++++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>+>+>+>++++>+++>++++++>++++++>++++++>++++++>++++++>++++++>++++++>++>++++++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>+++++>+++++>+++++>+++++>+++++>+++++>++>+++++>+>+>+>++++>++>++++>+++>+++>+>++>++>++++++>+++++>+++>++++>++>+>+++>++++++>+++++>+++>+++++++>++>++>++++++>+++++>+++>++++>+++>+++++>++++++>+++>+>++++>+++>+>+++++>+++>++++>+++>+++++>++>++++>+++>+++>+>+>+>+>+>+>+>+>++++>++>+>+>+>+>+>+>+>+>+++>++++++>+++++>++>++++++>++++++>+++++++>+++>+>+>+>+>++++>++>++++++>++++++>++++++>++++++>+++>++++++>+++++>++>++++++>++++++>++++++>++>++++>+++>+++++++>++>++++++>+++++>+++>++++>++++++>+++++>++>++>+++++>+>++++>++>+>+++++>++>++>++++>+++++++>++>+++++[<]>[[<+<+>>-]<[>+<-]>[>]>[>]+[<]<[<]>-[[>]>[>]<+[<]<[<]>-]>]<<->>>[[>]>[>]>++++++++[<+++++>-]<+++[<]<[<]>-[[>]>[>]<>++++[<++++>-]<+++[<]<[<]>-[[>]>[>]<--[<]<[<]>-[[>]>[>]<>+++++[<++++++>-]<+[<]<[<]>-[[>]>[>]<++[<]<[<]>-[[>]>[>]<>+++++++[<------->-]<+[<]<[<]>-[[>]>[>]<+[<]<[<]>-]]]]]]>]+++[>[<<+>>-]<[>+<-]<.>>-]<[<]-<+[<+]<[<]>[<<++++++++[>++++++++<-]>--.<++++[>----<-]>--->[<.>-]<[-]>>]+[>+]>>[.>]"; | |
pub fn quine(c: &mut Criterion) { | |
const MEMORY_LEN: usize = 8 << 10; | |
const OUTPUT_LEN: usize = 4 << 10; | |
let code = black_box(parse(QUINE_SRC).unwrap()); | |
c.bench_function("memset", |b| { | |
let mut memory = [0u8; MEMORY_LEN]; | |
let mut output = [0u8; OUTPUT_LEN]; | |
b.iter(|| { | |
memory.fill(0); | |
output.fill(0); | |
black_box(drop as fn(_))((&mut memory, &mut output)); | |
}); | |
}); | |
c.bench_function("quine-simple", |b| { | |
let mut memory = [0u8; MEMORY_LEN]; | |
let mut output = [0u8; OUTPUT_LEN]; | |
// Correctness. | |
assert_eq!( | |
run_simple(&code, &mut memory, &[], &mut output), | |
QUINE_SRC.len() | |
); | |
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes()); | |
b.iter(|| { | |
memory.fill(0); | |
output.fill(0); | |
run_simple(&code, &mut memory, &[], &mut output) | |
}); | |
}); | |
c.bench_function("quine-trampoline", |b| { | |
let code = trampoline::compile_ops(&code); | |
let mut memory = [0u8; MEMORY_LEN]; | |
let mut output = [0u8; OUTPUT_LEN]; | |
// Correctness. | |
assert_eq!( | |
trampoline::run(&code, &mut memory, &[], &mut output), | |
QUINE_SRC.len() | |
); | |
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes()); | |
b.iter(|| { | |
memory.fill(0); | |
output.fill(0); | |
trampoline::run(&code, &mut memory, &[], &mut output) | |
}); | |
}); | |
c.bench_function("quine-direct-threading", |b| { | |
let code = direct_threading::compile_ops(&code); | |
let mut memory = [0u8; MEMORY_LEN]; | |
let mut output = [0u8; OUTPUT_LEN]; | |
// Correctness. | |
assert_eq!( | |
direct_threading::run(&code, &mut memory, &[], &mut output), | |
QUINE_SRC.len() | |
); | |
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes()); | |
b.iter(|| { | |
memory.fill(0); | |
output.fill(0); | |
direct_threading::run(&code, &mut memory, &[], &mut output) | |
}); | |
}); | |
c.bench_function("quine-jit", |b| { | |
let code = jit::compile(&code); | |
let mut memory = [0u8; MEMORY_LEN]; | |
let mut output = [0u8; OUTPUT_LEN]; | |
// Correctness. | |
assert_eq!( | |
jit::run(&code, &mut memory, &[], &mut output), | |
QUINE_SRC.len() | |
); | |
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes()); | |
b.iter(|| { | |
memory.fill(0); | |
output.fill(0); | |
jit::run(&code, &mut memory, &[], &mut output) | |
}); | |
}); | |
} | |
fn parse(src: &str) -> Result<Vec<Op>> { | |
let mut ops = Vec::with_capacity(src.len()); | |
let mut loop_stack = Vec::new(); | |
for b in src.bytes() { | |
let op = match b { | |
b'<' => Op::Left, | |
b'>' => Op::Right, | |
b'+' => Op::Inc, | |
b'-' => Op::Dec, | |
b'.' => Op::Output, | |
b',' => Op::Input, | |
b'[' => { | |
loop_stack.push(ops.len()); | |
Op::LoopStart(0) | |
} | |
b']' => { | |
let i = loop_stack.pop().context("unpaired `]`")?; | |
ops[i] = Op::LoopStart(ops.len() + 1); | |
Op::LoopEnd(i + 1) | |
} | |
_ => bail!("unknow opcode: {:?}", b as char), | |
}; | |
ops.push(op); | |
} | |
Ok(ops) | |
} | |
#[derive(Debug, Clone, Copy)] | |
pub enum Op { | |
Left, | |
Right, | |
Inc, | |
Dec, | |
Output, | |
Input, | |
LoopStart(usize), | |
LoopEnd(usize), | |
} | |
fn run_simple(code: &[Op], memory: &mut [u8], mut input: &[u8], output: &mut [u8]) -> usize { | |
let mut ip @ mut ptr @ mut output_pos = 0usize; | |
while ip < code.len() { | |
match code[ip] { | |
Op::Left => ptr = ptr.wrapping_sub(1), | |
Op::Right => ptr = ptr.wrapping_add(1), | |
Op::Inc => memory[ptr] = memory[ptr].wrapping_add(1), | |
Op::Dec => memory[ptr] = memory[ptr].wrapping_sub(1), | |
Op::Output => { | |
output[output_pos] = memory[ptr]; | |
output_pos += 1; | |
} | |
Op::Input => { | |
memory[ptr] = input[0]; | |
input = &input[1..]; | |
} | |
Op::LoopStart(outside) => { | |
if memory[ptr] == 0 { | |
ip = outside; | |
continue; | |
} | |
} | |
Op::LoopEnd(inside) => { | |
if memory[ptr] != 0 { | |
ip = inside; | |
continue; | |
} | |
} | |
} | |
ip += 1; | |
} | |
output_pos | |
} | |
mod trampoline { | |
use super::*; | |
pub struct State<'m, 'i, 'o> { | |
ip: usize, | |
memory: &'m mut [u8], | |
ptr: usize, | |
input: &'i [u8], | |
output: &'o mut [u8], | |
output_pos: usize, | |
} | |
type OpFunc = fn(&mut State, usize); | |
type OpClosure = (OpFunc, usize); | |
pub fn compile_ops(ops: &[Op]) -> Vec<OpClosure> { | |
ops.iter() | |
.map(|op| match *op { | |
Op::Left => (op_left as _, 0), | |
Op::Right => (op_right as _, 0), | |
Op::Inc => (op_inc as _, 0), | |
Op::Dec => (op_dec as _, 0), | |
Op::Output => (op_output as _, 0), | |
Op::Input => (op_input as _, 0), | |
Op::LoopStart(x) => (op_loop_start as _, x), | |
Op::LoopEnd(x) => (op_loop_end as _, x), | |
}) | |
.collect() | |
} | |
fn op_left(s: &mut State, _: usize) { | |
s.ptr = s.ptr.wrapping_sub(1); | |
s.ip += 1; | |
} | |
fn op_right(s: &mut State, _: usize) { | |
s.ptr = s.ptr.wrapping_add(1); | |
s.ip += 1; | |
} | |
fn op_inc(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.memory[s.ptr].wrapping_add(1); | |
s.ip += 1; | |
} | |
fn op_dec(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.memory[s.ptr].wrapping_sub(1); | |
s.ip += 1; | |
} | |
fn op_output(s: &mut State, _: usize) { | |
s.output[s.output_pos] = s.memory[s.ptr]; | |
s.output_pos += 1; | |
s.ip += 1; | |
} | |
fn op_input(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.input[0]; | |
s.input = &s.input[1..]; | |
s.ip += 1; | |
} | |
fn op_loop_start(s: &mut State, outside: usize) { | |
if s.memory[s.ptr] != 0 { | |
s.ip += 1; | |
} else { | |
s.ip = outside; | |
} | |
} | |
fn op_loop_end(s: &mut State, inside: usize) { | |
if s.memory[s.ptr] != 0 { | |
s.ip = inside; | |
} else { | |
s.ip += 1; | |
} | |
} | |
pub fn run(code: &[OpClosure], memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize { | |
let mut s = State { | |
ip: 0, | |
memory, | |
ptr: 0, | |
input, | |
output, | |
output_pos: 0, | |
}; | |
while s.ip < code.len() { | |
let (f, data) = code[s.ip]; | |
f(&mut s, data); | |
} | |
s.output_pos | |
} | |
} | |
mod direct_threading { | |
use super::*; | |
pub struct State<'c, 'm, 'i, 'o> { | |
code: &'c [OpClosure], | |
ip: usize, | |
memory: &'m mut [u8], | |
ptr: usize, | |
input: &'i [u8], | |
output: &'o mut [u8], | |
output_pos: usize, | |
} | |
type OpFunc = fn(&mut State, usize); | |
type OpClosure = (OpFunc, usize); | |
pub fn compile_ops(ops: &[Op]) -> Vec<OpClosure> { | |
ops.iter() | |
.map(|op| match *op { | |
Op::Left => (op_left as _, 0), | |
Op::Right => (op_right as _, 0), | |
Op::Inc => (op_inc as _, 0), | |
Op::Dec => (op_dec as _, 0), | |
Op::Output => (op_output as _, 0), | |
Op::Input => (op_input as _, 0), | |
Op::LoopStart(x) => (op_loop_start as _, x), | |
Op::LoopEnd(x) => (op_loop_end as _, x), | |
}) | |
.collect() | |
} | |
fn op_left(s: &mut State, _: usize) { | |
s.ptr = s.ptr.wrapping_sub(1); | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_right(s: &mut State, _: usize) { | |
s.ptr = s.ptr.wrapping_add(1); | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_inc(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.memory[s.ptr].wrapping_add(1); | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_dec(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.memory[s.ptr].wrapping_sub(1); | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_output(s: &mut State, _: usize) { | |
s.output[s.output_pos] = s.memory[s.ptr]; | |
s.output_pos += 1; | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_input(s: &mut State, _: usize) { | |
s.memory[s.ptr] = s.input[0]; | |
s.input = &s.input[1..]; | |
s.ip += 1; | |
s.goto_next(); | |
} | |
fn op_loop_start(s: &mut State, outside: usize) { | |
if s.memory[s.ptr] != 0 { | |
s.ip += 1; | |
} else { | |
s.ip = outside; | |
} | |
s.goto_next(); | |
} | |
fn op_loop_end(s: &mut State, inside: usize) { | |
if s.memory[s.ptr] != 0 { | |
s.ip = inside; | |
} else { | |
s.ip += 1; | |
} | |
s.goto_next(); | |
} | |
impl State<'_, '_, '_, '_> { | |
#[inline(always)] | |
fn goto_next(&mut self) { | |
if self.ip < self.code.len() { | |
let (f, data) = self.code[self.ip]; | |
f(self, data); | |
} | |
} | |
} | |
pub fn run(code: &[OpClosure], memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize { | |
let mut s = State { | |
code, | |
ip: 0, | |
memory, | |
ptr: 0, | |
input, | |
output, | |
output_pos: 0, | |
}; | |
s.goto_next(); | |
s.output_pos | |
} | |
} | |
mod jit { | |
use super::*; | |
type Func = extern "sysv64" fn( | |
memory: *mut u8, // rdi | |
memory_len: usize, // rsi | |
input: *const u8, // rdx | |
input_len: usize, // rcx | |
output: *mut u8, // r8 | |
output_len: usize, // r9 | |
) -> usize; | |
pub struct Program { | |
code: ExecutableBuffer, | |
start: AssemblyOffset, | |
} | |
pub fn compile(ops: &[Op]) -> Program { | |
let mut stack = Vec::new(); | |
let mut asm = dynasmrt::x64::Assembler::new().unwrap(); | |
let start = asm.offset(); | |
dynasm!(asm | |
; .arch x64 | |
; xor r10, r10 // ptr | |
; xor r11, r11 // output_pos | |
); | |
let fail = asm.new_dynamic_label(); | |
for op in ops { | |
match *op { | |
Op::Left => dynasm!(asm | |
; .arch x64 | |
; dec r10 | |
; js =>fail | |
), | |
Op::Right => dynasm!(asm | |
; .arch x64 | |
; inc r10 | |
; cmp r10, rsi | |
; jae =>fail | |
), | |
Op::Inc => dynasm!(asm | |
; .arch x64 | |
; inc BYTE [rdi + r10] | |
), | |
Op::Dec => dynasm!(asm | |
; .arch x64 | |
; dec BYTE [rdi + r10] | |
), | |
Op::Output => dynasm!(asm | |
; .arch x64 | |
; cmp r11, r9 | |
; jae =>fail | |
; movzx eax, BYTE [rdi + r10] | |
; mov BYTE [r8 + r11], al | |
; inc r11 | |
), | |
Op::Input => dynasm!(asm | |
; .arch x64 | |
; dec rcx | |
; js =>fail | |
; movzx eax, BYTE [rdx] | |
; inc rdx | |
; mov BYTE [rdi + r10], al | |
), | |
Op::LoopStart(_) => { | |
let fwd = asm.new_dynamic_label(); | |
let bkd = asm.new_dynamic_label(); | |
stack.push((fwd, bkd)); | |
dynasm!(asm | |
; .arch x64 | |
; cmp BYTE [rdi + r10], 0 | |
; je =>fwd | |
;=>bkd | |
); | |
} | |
Op::LoopEnd(_) => { | |
let (fwd, bkd) = stack.pop().unwrap(); | |
dynasm!(asm | |
; .arch x64 | |
; cmp BYTE [rdi + r10], 0 | |
; jne =>bkd | |
;=>fwd | |
); | |
} | |
} | |
} | |
dynasm!(asm | |
; .arch x64 | |
; mov rax, r11 | |
; ret | |
;=>fail | |
; mov rax, -1 | |
; ret | |
); | |
let code = asm.finalize().unwrap(); | |
Program { code, start } | |
} | |
pub fn run(program: &Program, memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize { | |
unsafe { | |
let func: Func = std::mem::transmute(program.code.ptr(program.start)); | |
let ret = func( | |
memory.as_mut_ptr(), | |
memory.len(), | |
input.as_ptr(), | |
input.len(), | |
output.as_mut_ptr(), | |
output.len(), | |
); | |
assert_ne!(ret, !0usize); | |
ret | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Platform: AMD Ryzen 7 5700G | |
memset time: [133.92 ns 134.00 ns 134.13 ns] | |
quine-simple time: [5.4988 ms 5.5014 ms 5.5040 ms] | |
quine-trampoline time: [18.395 ms 18.413 ms 18.433 ms] | |
quine-direct-threading time: [17.044 ms 17.053 ms 17.063 ms] | |
quine-jit time: [837.99 µs 838.57 µs 839.25 µs] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment