Skip to content

Instantly share code, notes, and snippets.

@oxalica
Last active June 22, 2023 13:23
Show Gist options
  • Save oxalica/5cf57758c50ba3dc042fd34916e0b2e8 to your computer and use it in GitHub Desktop.
Save oxalica/5cf57758c50ba3dc042fd34916e0b2e8 to your computer and use it in GitHub Desktop.
Interpreter benchmark
use anyhow::{bail, Context, Result};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use dynasm::dynasm;
use dynasmrt::{AssemblyOffset, DynasmApi, DynasmLabelApi, ExecutableBuffer};
criterion_group!(benches, quine);
criterion_main!(benches);
/// Ref: https://gist.github.com/blahgeek/dd58a29a37e97fdf29f0
const QUINE_SRC: &str = ">>>>++>++>++>++++>+++>+++++>++>++++>++++>+++>+>+++>+>++>++>++++++>+++++>+++>++++>++>+>+++>++++++>+++++>++>++++>++>+++++>++>++++>++>+++++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>+++++>++>+++++>+++>+++>++++++>++>++>++>++++>++++>++>+++++>++>++++>++>+++++>++>+>+>+>+>+>+>+>+>++++>+++>+>+>+>+>+>++>++++++>+++++>+++>+>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>++++>+++>+>+>+>+>++>++++++>+++++>+++>+>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++++++>++++++>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>+>++++>+++>+>+>+>+>+>+>++>++++++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>++>+>+>+>+>+>+>+>++++>+++>++++++>++++++>++++++>++++++>++++++>++++++>++++++>++>++++++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>++++>++++>++>+++++>++>++++>++>+++++>+++>+>++++>+++>+++++>+++>++++>+++>+++++>++>++++++>+++++>+++++>+++++>+++++>+++++>+++++>++>+++++>+>+>+>++++>++>++++>+++>+++>+>++>++>++++++>+++++>+++>++++>++>+>+++>++++++>+++++>+++>+++++++>++>++>++++++>+++++>+++>++++>+++>+++++>++++++>+++>+>++++>+++>+>+++++>+++>++++>+++>+++++>++>++++>+++>+++>+>+>+>+>+>+>+>+>++++>++>+>+>+>+>+>+>+>+>+++>++++++>+++++>++>++++++>++++++>+++++++>+++>+>+>+>+>++++>++>++++++>++++++>++++++>++++++>+++>++++++>+++++>++>++++++>++++++>++++++>++>++++>+++>+++++++>++>++++++>+++++>+++>++++>++++++>+++++>++>++>+++++>+>++++>++>+>+++++>++>++>++++>+++++++>++>+++++[<]>[[<+<+>>-]<[>+<-]>[>]>[>]+[<]<[<]>-[[>]>[>]<+[<]<[<]>-]>]<<->>>[[>]>[>]>++++++++[<+++++>-]<+++[<]<[<]>-[[>]>[>]<>++++[<++++>-]<+++[<]<[<]>-[[>]>[>]<--[<]<[<]>-[[>]>[>]<>+++++[<++++++>-]<+[<]<[<]>-[[>]>[>]<++[<]<[<]>-[[>]>[>]<>+++++++[<------->-]<+[<]<[<]>-[[>]>[>]<+[<]<[<]>-]]]]]]>]+++[>[<<+>>-]<[>+<-]<.>>-]<[<]-<+[<+]<[<]>[<<++++++++[>++++++++<-]>--.<++++[>----<-]>--->[<.>-]<[-]>>]+[>+]>>[.>]";
pub fn quine(c: &mut Criterion) {
const MEMORY_LEN: usize = 8 << 10;
const OUTPUT_LEN: usize = 4 << 10;
let code = black_box(parse(QUINE_SRC).unwrap());
c.bench_function("memset", |b| {
let mut memory = [0u8; MEMORY_LEN];
let mut output = [0u8; OUTPUT_LEN];
b.iter(|| {
memory.fill(0);
output.fill(0);
black_box(drop as fn(_))((&mut memory, &mut output));
});
});
c.bench_function("quine-simple", |b| {
let mut memory = [0u8; MEMORY_LEN];
let mut output = [0u8; OUTPUT_LEN];
// Correctness.
assert_eq!(
run_simple(&code, &mut memory, &[], &mut output),
QUINE_SRC.len()
);
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes());
b.iter(|| {
memory.fill(0);
output.fill(0);
run_simple(&code, &mut memory, &[], &mut output)
});
});
c.bench_function("quine-trampoline", |b| {
let code = trampoline::compile_ops(&code);
let mut memory = [0u8; MEMORY_LEN];
let mut output = [0u8; OUTPUT_LEN];
// Correctness.
assert_eq!(
trampoline::run(&code, &mut memory, &[], &mut output),
QUINE_SRC.len()
);
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes());
b.iter(|| {
memory.fill(0);
output.fill(0);
trampoline::run(&code, &mut memory, &[], &mut output)
});
});
c.bench_function("quine-direct-threading", |b| {
let code = direct_threading::compile_ops(&code);
let mut memory = [0u8; MEMORY_LEN];
let mut output = [0u8; OUTPUT_LEN];
// Correctness.
assert_eq!(
direct_threading::run(&code, &mut memory, &[], &mut output),
QUINE_SRC.len()
);
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes());
b.iter(|| {
memory.fill(0);
output.fill(0);
direct_threading::run(&code, &mut memory, &[], &mut output)
});
});
c.bench_function("quine-jit", |b| {
let code = jit::compile(&code);
let mut memory = [0u8; MEMORY_LEN];
let mut output = [0u8; OUTPUT_LEN];
// Correctness.
assert_eq!(
jit::run(&code, &mut memory, &[], &mut output),
QUINE_SRC.len()
);
assert_eq!(&output[..QUINE_SRC.len()], QUINE_SRC.as_bytes());
b.iter(|| {
memory.fill(0);
output.fill(0);
jit::run(&code, &mut memory, &[], &mut output)
});
});
}
fn parse(src: &str) -> Result<Vec<Op>> {
let mut ops = Vec::with_capacity(src.len());
let mut loop_stack = Vec::new();
for b in src.bytes() {
let op = match b {
b'<' => Op::Left,
b'>' => Op::Right,
b'+' => Op::Inc,
b'-' => Op::Dec,
b'.' => Op::Output,
b',' => Op::Input,
b'[' => {
loop_stack.push(ops.len());
Op::LoopStart(0)
}
b']' => {
let i = loop_stack.pop().context("unpaired `]`")?;
ops[i] = Op::LoopStart(ops.len() + 1);
Op::LoopEnd(i + 1)
}
_ => bail!("unknow opcode: {:?}", b as char),
};
ops.push(op);
}
Ok(ops)
}
#[derive(Debug, Clone, Copy)]
pub enum Op {
Left,
Right,
Inc,
Dec,
Output,
Input,
LoopStart(usize),
LoopEnd(usize),
}
fn run_simple(code: &[Op], memory: &mut [u8], mut input: &[u8], output: &mut [u8]) -> usize {
let mut ip @ mut ptr @ mut output_pos = 0usize;
while ip < code.len() {
match code[ip] {
Op::Left => ptr = ptr.wrapping_sub(1),
Op::Right => ptr = ptr.wrapping_add(1),
Op::Inc => memory[ptr] = memory[ptr].wrapping_add(1),
Op::Dec => memory[ptr] = memory[ptr].wrapping_sub(1),
Op::Output => {
output[output_pos] = memory[ptr];
output_pos += 1;
}
Op::Input => {
memory[ptr] = input[0];
input = &input[1..];
}
Op::LoopStart(outside) => {
if memory[ptr] == 0 {
ip = outside;
continue;
}
}
Op::LoopEnd(inside) => {
if memory[ptr] != 0 {
ip = inside;
continue;
}
}
}
ip += 1;
}
output_pos
}
mod trampoline {
use super::*;
pub struct State<'m, 'i, 'o> {
ip: usize,
memory: &'m mut [u8],
ptr: usize,
input: &'i [u8],
output: &'o mut [u8],
output_pos: usize,
}
type OpFunc = fn(&mut State, usize);
type OpClosure = (OpFunc, usize);
pub fn compile_ops(ops: &[Op]) -> Vec<OpClosure> {
ops.iter()
.map(|op| match *op {
Op::Left => (op_left as _, 0),
Op::Right => (op_right as _, 0),
Op::Inc => (op_inc as _, 0),
Op::Dec => (op_dec as _, 0),
Op::Output => (op_output as _, 0),
Op::Input => (op_input as _, 0),
Op::LoopStart(x) => (op_loop_start as _, x),
Op::LoopEnd(x) => (op_loop_end as _, x),
})
.collect()
}
fn op_left(s: &mut State, _: usize) {
s.ptr = s.ptr.wrapping_sub(1);
s.ip += 1;
}
fn op_right(s: &mut State, _: usize) {
s.ptr = s.ptr.wrapping_add(1);
s.ip += 1;
}
fn op_inc(s: &mut State, _: usize) {
s.memory[s.ptr] = s.memory[s.ptr].wrapping_add(1);
s.ip += 1;
}
fn op_dec(s: &mut State, _: usize) {
s.memory[s.ptr] = s.memory[s.ptr].wrapping_sub(1);
s.ip += 1;
}
fn op_output(s: &mut State, _: usize) {
s.output[s.output_pos] = s.memory[s.ptr];
s.output_pos += 1;
s.ip += 1;
}
fn op_input(s: &mut State, _: usize) {
s.memory[s.ptr] = s.input[0];
s.input = &s.input[1..];
s.ip += 1;
}
fn op_loop_start(s: &mut State, outside: usize) {
if s.memory[s.ptr] != 0 {
s.ip += 1;
} else {
s.ip = outside;
}
}
fn op_loop_end(s: &mut State, inside: usize) {
if s.memory[s.ptr] != 0 {
s.ip = inside;
} else {
s.ip += 1;
}
}
pub fn run(code: &[OpClosure], memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize {
let mut s = State {
ip: 0,
memory,
ptr: 0,
input,
output,
output_pos: 0,
};
while s.ip < code.len() {
let (f, data) = code[s.ip];
f(&mut s, data);
}
s.output_pos
}
}
mod direct_threading {
use super::*;
pub struct State<'c, 'm, 'i, 'o> {
code: &'c [OpClosure],
ip: usize,
memory: &'m mut [u8],
ptr: usize,
input: &'i [u8],
output: &'o mut [u8],
output_pos: usize,
}
type OpFunc = fn(&mut State, usize);
type OpClosure = (OpFunc, usize);
pub fn compile_ops(ops: &[Op]) -> Vec<OpClosure> {
ops.iter()
.map(|op| match *op {
Op::Left => (op_left as _, 0),
Op::Right => (op_right as _, 0),
Op::Inc => (op_inc as _, 0),
Op::Dec => (op_dec as _, 0),
Op::Output => (op_output as _, 0),
Op::Input => (op_input as _, 0),
Op::LoopStart(x) => (op_loop_start as _, x),
Op::LoopEnd(x) => (op_loop_end as _, x),
})
.collect()
}
fn op_left(s: &mut State, _: usize) {
s.ptr = s.ptr.wrapping_sub(1);
s.ip += 1;
s.goto_next();
}
fn op_right(s: &mut State, _: usize) {
s.ptr = s.ptr.wrapping_add(1);
s.ip += 1;
s.goto_next();
}
fn op_inc(s: &mut State, _: usize) {
s.memory[s.ptr] = s.memory[s.ptr].wrapping_add(1);
s.ip += 1;
s.goto_next();
}
fn op_dec(s: &mut State, _: usize) {
s.memory[s.ptr] = s.memory[s.ptr].wrapping_sub(1);
s.ip += 1;
s.goto_next();
}
fn op_output(s: &mut State, _: usize) {
s.output[s.output_pos] = s.memory[s.ptr];
s.output_pos += 1;
s.ip += 1;
s.goto_next();
}
fn op_input(s: &mut State, _: usize) {
s.memory[s.ptr] = s.input[0];
s.input = &s.input[1..];
s.ip += 1;
s.goto_next();
}
fn op_loop_start(s: &mut State, outside: usize) {
if s.memory[s.ptr] != 0 {
s.ip += 1;
} else {
s.ip = outside;
}
s.goto_next();
}
fn op_loop_end(s: &mut State, inside: usize) {
if s.memory[s.ptr] != 0 {
s.ip = inside;
} else {
s.ip += 1;
}
s.goto_next();
}
impl State<'_, '_, '_, '_> {
#[inline(always)]
fn goto_next(&mut self) {
if self.ip < self.code.len() {
let (f, data) = self.code[self.ip];
f(self, data);
}
}
}
pub fn run(code: &[OpClosure], memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize {
let mut s = State {
code,
ip: 0,
memory,
ptr: 0,
input,
output,
output_pos: 0,
};
s.goto_next();
s.output_pos
}
}
mod jit {
use super::*;
type Func = extern "sysv64" fn(
memory: *mut u8, // rdi
memory_len: usize, // rsi
input: *const u8, // rdx
input_len: usize, // rcx
output: *mut u8, // r8
output_len: usize, // r9
) -> usize;
pub struct Program {
code: ExecutableBuffer,
start: AssemblyOffset,
}
pub fn compile(ops: &[Op]) -> Program {
let mut stack = Vec::new();
let mut asm = dynasmrt::x64::Assembler::new().unwrap();
let start = asm.offset();
dynasm!(asm
; .arch x64
; xor r10, r10 // ptr
; xor r11, r11 // output_pos
);
let fail = asm.new_dynamic_label();
for op in ops {
match *op {
Op::Left => dynasm!(asm
; .arch x64
; dec r10
; js =>fail
),
Op::Right => dynasm!(asm
; .arch x64
; inc r10
; cmp r10, rsi
; jae =>fail
),
Op::Inc => dynasm!(asm
; .arch x64
; inc BYTE [rdi + r10]
),
Op::Dec => dynasm!(asm
; .arch x64
; dec BYTE [rdi + r10]
),
Op::Output => dynasm!(asm
; .arch x64
; cmp r11, r9
; jae =>fail
; movzx eax, BYTE [rdi + r10]
; mov BYTE [r8 + r11], al
; inc r11
),
Op::Input => dynasm!(asm
; .arch x64
; dec rcx
; js =>fail
; movzx eax, BYTE [rdx]
; inc rdx
; mov BYTE [rdi + r10], al
),
Op::LoopStart(_) => {
let fwd = asm.new_dynamic_label();
let bkd = asm.new_dynamic_label();
stack.push((fwd, bkd));
dynasm!(asm
; .arch x64
; cmp BYTE [rdi + r10], 0
; je =>fwd
;=>bkd
);
}
Op::LoopEnd(_) => {
let (fwd, bkd) = stack.pop().unwrap();
dynasm!(asm
; .arch x64
; cmp BYTE [rdi + r10], 0
; jne =>bkd
;=>fwd
);
}
}
}
dynasm!(asm
; .arch x64
; mov rax, r11
; ret
;=>fail
; mov rax, -1
; ret
);
let code = asm.finalize().unwrap();
Program { code, start }
}
pub fn run(program: &Program, memory: &mut [u8], input: &[u8], output: &mut [u8]) -> usize {
unsafe {
let func: Func = std::mem::transmute(program.code.ptr(program.start));
let ret = func(
memory.as_mut_ptr(),
memory.len(),
input.as_ptr(),
input.len(),
output.as_mut_ptr(),
output.len(),
);
assert_ne!(ret, !0usize);
ret
}
}
}
Platform: AMD Ryzen 7 5700G
memset time: [133.92 ns 134.00 ns 134.13 ns]
quine-simple time: [5.4988 ms 5.5014 ms 5.5040 ms]
quine-trampoline time: [18.395 ms 18.413 ms 18.433 ms]
quine-direct-threading time: [17.044 ms 17.053 ms 17.063 ms]
quine-jit time: [837.99 µs 838.57 µs 839.25 µs]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment