Skip to content

Instantly share code, notes, and snippets.

@kkestell
Last active August 29, 2015 14:10
Show Gist options
  • Save kkestell/26bc92f99d027ee24dd8 to your computer and use it in GitHub Desktop.
Save kkestell/26bc92f99d027ee24dd8 to your computer and use it in GitHub Desktop.

TinyVM

TinyVM is a simple register based virtual machine implemented in C (tinyvm.c). The bytecode assembler is written in Python (tas.py). TinyVM has 4 registers ($0 - $3) and 64k of memory in a 32-bit address space (0x00000000 - 0x0000FFFF).

Each instruction is encoded in a single 64-bit word. Register count and memory are defined at compile time, but due to only having 32 bits available for addressing and 8 bits for registers, allocating more than 4GB of memory or 256 registers is pointless.

The following instructions (loosely based on MIPS) have been implemented:

No.    | Keyword | Instruction                   | Example            | Description
-------|---------|-------------------------------|--------------------|------------
0x00   | `halt`  | Halt                          | `halt`             | Terminate program
0x01   | `nop`   | No Operation                  | `nop`              | Do nothing
0x02   | `li`    | Load Immediate                | `li $0 0x00000000` | Load `0x00000000` into `$0`
0x03   | `lw`    | Load Word                     | `lw $0 $1`         | Load the contents of the memory location pointed to by `$1` into `$0`
0x04   | `sw`    | Store Word                    | `sw $0 $1`         | Store the contents of `$1` in the memory location pointed to by `$0`
0x05   | `add`   | Add                           | `add $2 $0 $1`     | Add `$0` to `$1` and store the result in `$2`
0x06   | `sub`   | Subtract                      | `sub $2 $0 $1`     | Subtract `$1` from `$0` and store the result in `$2`
0x07   | `mult`  | Multiply                      | `mult $2 $0 $1`    | Multiply `$0` by `$1` and store the result in `$2`
0x08   | `div`   | Divide                        | `div $2 $0 $1`     | Divide `$0` by `$1` and store the result in `$2`
0x09   | `j`     | Unconditional Jump            | `j 0x00000000`     | Jump to memory location `0x00000000`
0x0A   | `jr`    | Unconditional Jump (Register) | `jr $0`            | Jump to memory location stored in `$0`
0x0B   | `beq`   | Branch if Equal               | `bne $0 $1 $2`     | Branch to memory location stored in `$2` if `$0` and `$1` are equal
0x0C   | `bne`   | Branch if Not Equal           | `beq $0 $1 $2`     | Branch to memory location stored in `$2` if `$0` and `$1` are not equal
0x0D   | `inc`   | Increment Register            | `inc $0`           | Increment `$0`
0x0E   | `dec`   | Decrement Register            | `dec $0`           | Decrement `$0`

Bytecode Format

Each instruction is a single 64-bit word composed of eight 8-bit octets (little endian).

The first octet contains the instruction number, while the second, third, and fourth octets contain register numbers. The lower 32 bits are either an unsigned memory location or a signed immediate value.

Instruction | Register 0 | Register 1 | Register 2 | Immediate Value
------------|------------|------------|------------|----------------
0000 0000   | 0000 0000  | 0000 0000  | 0000 0000  | 0000 0000 0000 0000 0000 0000 0000 0000

Assembly            | Hex                  | Binary
--------------------|----------------------|-------
`li  $2 0x7fffffff` | `0x102000007fffffff` | 0001 0000 0010 0000 0000 0000 0000 0000 0111 1111 1111 1111 1111 1111 1111 1111
`add $2 $0 $1`      | `0x4020001000000000` | 0100 0000 0010 0000 0000 0000 0001 0000 0000 0000 0000 0000 0000 0000 0000 0000

Assembly Language

Comments

Lines which begin with a semicolon are treated as comments.

Labels

Lines which end with a colon are treated as labels. The assembler works in two passes so there is no need to forward declare your labels.

TODO

TODO

Example Assembly

; fill up 64k of memory

; counter
li $1 0x00000000

; end
li $2 0x0000FFFF

; memory location of loop start
li $3 loop

loop:
  ; store the value of the counter in the memory location contained in the counter.
  sw $1 $1

  ; increment the counter
  inc $1

  ; loop if the counter hasn't yet reached the end
  bne $1 $2 $3

  ; end program
  halt

Example

Assuming the assembly source above...

Assemble, generate bytecode

$ python tas.py test.asm test.tvm
0201000000000000 li $1 0x00000000
020200000000ffff li $2 0x0000FFFF
0203000000000003 li $3 loop
0401010000000000 sw $1 $1
0d01000000000000 inc $1
0c01020300000000 bne $1 $2 $3
0000000000000000 halt

Inspect bytecode

$ od -x test.tvm
0000000          00000000        02010000        0000ffff        02020000
0000020          00000003        02030000        00000000        04010100
0000040          00000000        0d010000        00000000        0c010203
0000060          00000000        00000000
0000070

Compile TinyVM

$ clang tinyvm.c -o tinyvm

Execute bytecode

Each cycle, TinyVM prints the value of the program counter, the next instruction, executes that instruction, and then prints the values of the registers, with each line representing a single CPU cycle.

$ ./tinyvm test.tvm
00000001 02000000ffffffff ffffffff 00000000 00000000 00000000
00000002 0201000012345678 ffffffff 12345678 00000000 00000000
00000003 0202000000012ac0 ffffffff 12345678 00012ac0 00000000
00000004 0503010200000000 ffffffff 12345678 00012ac0 12358138
00000005 0600010200000000 12332bb8 12345678 00012ac0 12358138
00000006 0000000000000000 12332bb8 12345678 00012ac0 12358138
from struct import *
import sys
if len(sys.argv) < 3:
print 'USAGE: as.py INFILE OUTFILE'
sys.exit(1)
lines = [line.strip() for line in open(sys.argv[1]) if line.strip() != '' and line.strip()[0] != ';']
executable_lines = [line for line in lines if line[-1] != ':']
jump_table = {}
pc = 0
for line in lines:
if line[-1] == ':':
jump_table[line[:-1]] = pc
pc += 1
instructions = {
'halt': ['xxxx', 0x0],
'nop' : ['xxxx', 0x1],
'li' : ['rxxi', 0x2],
'lw' : ['rrxx', 0x3],
'sw' : ['rrxx', 0x4],
'add' : ['rrrx', 0x5],
'sub' : ['rrrx', 0x6],
'mult': ['rrrx', 0x7],
'div' : ['rrrx', 0x8],
'j' : ['xxxi', 0x9],
'jr' : ['rxxx', 0xA],
'beq' : ['rrrx', 0xB],
'bne' : ['rrrx', 0xC],
'inc' : ['rxxx', 0xD],
'dec' : ['rxxx', 0xE]
}
def resolve(atom):
if atom in jump_table:
return jump_table[atom]
else:
return int(atom, 16)
of = open(sys.argv[2], 'wb')
for line in executable_lines:
tokens = [token.strip() for token in line.split(' ')]
if tokens[0] not in instructions:
print 'undefined instruction'
continue
fmt, instr = instructions[tokens[0]]
opcode = instr << 56
if fmt == 'rxxx':
opcode += (int(tokens[1][1]) << 48)
elif fmt == 'rxxi':
opcode += (int(tokens[1][1]) << 48) + resolve(tokens[2])
elif fmt == 'rrxx':
opcode += (int(tokens[1][1]) << 48) + (int(tokens[2][1]) << 40)
elif fmt == 'rrrx':
opcode += (int(tokens[1][1]) << 48) + (int(tokens[2][1]) << 40) + (int(tokens[3][1]) << 32)
elif fmt == 'xxxi':
opcode += resolve(tokens[1])
print format(opcode, '016x'), line
of.write(pack('<Q', opcode))
of.close
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define NUM_REGS 4
#define RAM_SIZE 65535
unsigned pc = 0;
unsigned long long *program = NULL;
int *ram = NULL;
int *r = NULL;
bool running = true;
unsigned long long fetch() {
return program[pc++];
}
void debug_registers() {
for(int i = 0; i < NUM_REGS; i++) printf("%08x ", r[i]);
printf("\n");
}
void debug_ram() {
printf("ram\n");
for(int i = 0; i <= RAM_SIZE; i++) {
printf("%08x ", ram[i]);
}
printf("\n");
}
void cycle() {
char op = 0;
unsigned r0 = 0;
unsigned r1 = 0;
unsigned r2 = 0;
unsigned im = 0;
unsigned long long instr = fetch();
printf("%08x %016llx ", pc, instr);
/*
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
instr. r0 r1 r2 immediate value
*/
op = (instr & 0xFF00000000000000) >> 56;
r0 = (instr & 0x00FF000000000000) >> 48;
r1 = (instr & 0x0000FF0000000000) >> 40;
r2 = (instr & 0x000000FF00000000) >> 32;
im = (instr & 0x00000000FFFFFFFF);
switch(op) {
case 0x0: // halt
running = false;
break;
case 0x1: // nop
break;
case 0x2: // li
r[r0] = im;
break;
case 0x3: // lw
r[r0] = ram[r[r1]];
break;
case 0x4: // sw
ram[r[r1]] = r[r0];
break;
case 0x5: // add
r[r0] = r[r1] + r[r2];
break;
case 0x6: // sub
r[r0] = r[r1] - r[r2];
break;
case 0x7: // mult
r[r0] = r[r1] * r[r2];
break;
case 0x8: // div
r[r0] = r[r1] / r[r2];
break;
case 0x9: // j
pc = im;
break;
case 0xA: // jr
pc = r[r0];
break;
case 0xB: // beq
if(r[r0] == r[r1]) pc = r[r2];
break;
case 0xC: // bne
if(r[r0] != r[r1]) pc = r[r2];
break;
case 0xD: // inc
r[r0]++;
break;
case 0xE: // dec
r[r0]--;
break;
}
debug_registers();
}
bool load_program(const char *filename) {
FILE *fp;
long length;
fp = fopen(filename, "r");
if(fp == NULL) return false;
fseek(fp, 0L, SEEK_END);
length = ftell(fp) / sizeof(int);
fseek(fp, 0L, SEEK_SET);
program = (unsigned long long*)calloc(length, sizeof(unsigned long long));
if(program == NULL) return false;
fread(program, sizeof(unsigned long long), length, fp);
fclose(fp);
return true;
}
bool allocate_ram() {
ram = (int*)calloc(RAM_SIZE, sizeof(int));
return ram == NULL ? false : true;
}
bool allocate_registers() {
r = (int*)calloc(NUM_REGS, sizeof(int));
return r == NULL ? false : true;
}
int main(int argc, const char *argv[]) {
if(!allocate_registers()) {
printf("error allocating registers\n");
return EXIT_FAILURE;
}
if(!allocate_ram()) {
printf("error allocating ram\n");
return EXIT_FAILURE;
}
if(!load_program(argv[1])) {
printf("error loading program\n");
return EXIT_FAILURE;
}
while(running) {
cycle();
}
//debug_ram();
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment