Skip to content

Instantly share code, notes, and snippets.

@DmitrySoshnikov
Last active March 23, 2022 07:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DmitrySoshnikov/afda459222e96e6002ac to your computer and use it in GitHub Desktop.
Save DmitrySoshnikov/afda459222e96e6002ac to your computer and use it in GitHub Desktop.
Educational Stack-based VM with recursion example
/**
* Educational Stack-based VM.
*
* See also:
* - A simplified example without recursion: https://gist.github.com/DmitrySoshnikov/76e1cfb930df8d36145c
* - Register-based VM example: https://gist.github.com/DmitrySoshnikov/6407781
*
* by Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
* http://dmitrysoshnikov.com
* MIT Stye License (C) 2015
*/
var _stack = []; // main evaluation stack
var _pc = 0; // program counter
var _regs = {$a: 0, $b: 0, $c: 0, $d: 0}; // generic registers
var _zf; // zero flag (used in `cmp` instruction)
var $sp = 0; // stack pointer
var $fp = 0; // frame pointer
var _running = false; // whether the machine is running
// Checks whether a value is a user-level register.
function isReg(v) {
return v[0] === '$';
}
// Gets the address of the label, or just returns
// the address if it's an actual address.
function toAddr(v) {
if (typeof v === 'string') {
return _labels[v + ':'];
}
return v;
}
// ---------------------------------------------------------------
// 1. Opcodes (directly in assembly mnemonics for brevity).
//
// In real machine this would be actual binary codes of
// instructions corresponding to the assembly mnemonics.
// ---------------------------------------------------------------
var ops = {
// Stops the execution.
halt: function() {
_running = false;
},
// Moves contents to a register.
mov: function(r, v) {
_regs[r] = eval(v);
},
// Sums two registers, result is in the first one.
add: function(r1, r2) {
_regs[r1] = _regs[r1] + _regs[r2];
},
// Pushes on the stack advancing the stack pointer.
push: function(v) {
// If a register is pushed, get its value.
if (isReg(v)) {
v = _regs[v];
}
_stack.push(eval(v));
$sp = _stack.length - 1;
},
// Pops a value from top of the stack. If the register
// is passed, stores value there.
pop: function(r) {
var v = _stack.pop();
$sp = _stack.length - 1;
// Pop may provide a register to store the value.
if (r) {
_regs[r] = v;
}
return v;
},
/**
* Calls a procedure.
* Caller pushes params on the stack, and the
* `call` in addition does the routine work by saving
* the frame pointer (`$fp`), and saving the return address.
* The return value is always passed in the `$a` (accumulator) register.
* All together it's an Activation Record (AR, aka a "stack frame"):
*
* AR:
*
* -----------
* param1
* param2 Caller's responsibility
* ...
* paramN
* -----------
* fp Callee's responsibility
* ret addr (pc + 1)
* -----------
*/
call: function(proc) {
this.push($fp); // Save caller's frame pointer
$fp = $sp - 1; // And set it to proc params
this.push(_pc + 1); // Save the return address (follows the `call`)
this.jmp(toAddr(proc)); // actual control transfer to procedure
},
/**
* This version uses "Callee clean-up" convention, meaning the callee
* removes all passed params from the stack. The `n` param says how many
* params (bytes in real machines) to remove from the stack. As the result
* of this instruction the whole AR is popped from the stack, and the control is
* transferred back to the caller.
*
* Notice, since the callee cleans everything up, this convention doesn't
* support passing of variant number of params.
*/
ret: function(n) {
n = n || 0; // number of params (bytes) to pop
// Return address is restored.
_pc = this.pop();
// Don't advance PC in main cycle since we just changed it.
_shouldAdvancePc = false;
// Params unwind.
while (n > 0) {
this.pop();
n--;
}
// Caller's fp is restored.
$fp = this.pop();
},
// Unconditional jump to a specific address.
jmp: function(addr) {
_pc = addr;
// Tell CPU don't advance pc since we just changed it in `jmp`.
_shouldAdvancePc = false;
},
// Compares two operands and updates `_zf` (zero flag).
cmp: function(v1, v2) {
v1 = isReg(v1) ? _regs[v1] : eval(v1);
v2 = isReg(v2) ? _regs[v2] : eval(v2);
// Compare is just updating the zero flag:
// We support here only the simplest version:
// "equal/not-euqal", without "greater", etc ops.
// 1 - operands are equal
// 0 - not equal
_zf = (v1 === v2) ? 1 : 0;
},
// Conditional jump if previously compared operands were equal.
je: function(addr) {
if (_zf === 1) {
this.jmp(toAddr(addr));
}
},
// The same as `je`, but if compared operands were not equal.
jne: function(addr) {
if (_zf === 0) {
this.jmp(toAddr(addr));
}
},
};
// ---------------------------------------------------------------
// 2. Assembler. In real world has two passes: at first collects all labels,
// in the second replaces labels with addresses and actually translates
// to machine code. Here we only will collect the labels, and remove them
// from the program, skipping the translation to byte-code for brevity
// (the program runner will work directly with assembly mnemonics)
// ---------------------------------------------------------------
// A map from label id to address in the program.
var _labels = {};
function assemble(program) {
var translated = [];
var labelsCount = 0;
for (var i = 0; i < program.length; i++) {
// if it's a label, record the address.
if (program[i].slice(-1) === ':') {
_labels[program[i]] = i - labelsCount;
labelsCount++;
continue;
}
translated.push(program[i]);
}
return translated;
}
// ---------------------------------------------------------------
// 3. CPU
// Main CPU cycle of running the program: "fetch, decode, eval".
// ---------------------------------------------------------------
var _shouldAdvancePc = true;
var CPU = {
execute: function(program) {
//return;
_pc = _labels['main:']; // jump to the entry point of the program
_running = true;
while (_running) {
var instr = program[_pc]; // "fetch!"
_shouldAdvancePc = true;
// ...
// here should be decode step, but we skip it
// for brevity since work directly with assembler
// ...
ops[instr[0]].apply(ops, instr.slice(1)); // "eval!"
// If it's not unconditional jump (which modifies the pc itself)
// just go to the next instruction.
if (_shouldAdvancePc) {
_pc++;
}
}
}
};
// ---------------------------------------------------------------
// 4. Test program
// ---------------------------------------------------------------
var program = [
// The `sum` function (label).
'sum:',
['mov', '$a', '_stack[$fp]'],
['mov', '$b', '_stack[$fp - 1]'],
['add', '$a', '$b'],
['cmp', '$a', 40], // on first run `$a` is 30, so this is false
['je', '.sumret'], // return from recursive call, when `$a` is 40
['push', '$a'], // otherwise, push again params,
['push', '$b'],
['call', 'sum'], // and do the recusive `sum` call
'.sumret:',
['ret', 2],
// Main entry point.
'main:',
// Call the `sum` function.
// Params are passed (pushed onto the stack)
['push', 10],
['push', 20],
['call', 'sum'],
// Exit.
['halt']
];
// Debug output of a program.
function dumpProgram(program) {
program.forEach(function(instr) {
if (Array.isArray(instr)) { // actual instruction
console.log(' ', instr[0], instr.slice(1).join(', '));
} else if (instr[0] == '.') { // local label
console.log(instr);
} else {
console.log('\n', instr); // label
}
});
}
// Print the example program.
console.log('Execute the program:');
dumpProgram(program);
// Run the program.
CPU.execute(assemble(program));
// Check the result in the accumulator, 40.
console.log('\nAccumulator result ($a):', _regs.$a, '\n');
// Execute the program:
//
// sum:
// mov $a, _stack[$fp]
// mov $b, _stack[$fp - 1]
// add $a, $b
// cmp $a, 40
// je .sumret
// push $a
// push $b
// call sum
// .sumret:
// ret 2
//
// main:
// push 10
// push 20
// call sum
// halt
//
// Accumulator result ($a): 40
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment