Skip to content

Instantly share code, notes, and snippets.

@viluon
Created November 19, 2016 11:51
Show Gist options
  • Save viluon/646da7c011cc4b6247ad44b34ee0b5de to your computer and use it in GitHub Desktop.
Save viluon/646da7c011cc4b6247ad44b34ee0b5de to your computer and use it in GitHub Desktop.
FIKS VM
// viluon's simple language
// Test modulo
mod 10 11
local args = { ... }
if #args ~= 1 then
error( "Expected 1 argument, the file to open!", 0 )
end
local token_types
local operations
local emit
-- Global for the whole parser instance, yeah... I get that it's suboptimal, but cba
local defined_functions = {}
--- Serialise a table
-- @param tbl The table to pretty-print
-- @param indent The indentation level to use
-- @return nil
local function serialise( tbl, indent )
indent = indent or 0
for k, v in pairs( tbl ) do
if type( v ) == "table" then
print( string.rep( " ", indent ) .. tostring( k ) .. " = {" )
serialise( v, indent + 2 )
print( string.rep( " ", indent ) .. "}" )
elseif type( v ) == "string" then
print( string.rep( " ", indent ) .. tostring( k ) .. " = '" .. tostring( v ) .. "'" )
else
print( string.rep( " ", indent ) .. tostring( k ) .. " = " .. tostring( v ) )
end
end
end
--- Pretty-print the registers
-- @param tbl The register table to pretty-print
-- @return nil
local function print_registers( tbl )
local line_1 = "|"
local line_2 = "|"
local size = 5
for i, v in ipairs( tbl ) do
line_1 = line_1 .. string.rep( " ", size - #tostring( i ) ) .. tostring( i ) .. "|"
line_2 = line_2 .. string.rep( " ", size - #tostring( v ) ) .. tostring( v ) .. "|"
end
print( line_1 )
print( line_2 )
end
--- Push an operation to the end of output
-- @param output The output stream to use
-- @param op The operation type
-- @param ... Arguments to the operation
-- @return nil
local function push( output, op, ... )
output[ #output + 1 ] = {
op = op;
args = { ... };
}
end
--- Create a new function
-- @param name The name for the function
-- @param fn_tokens The tokens of the function's body
-- @return nil
local function define_function( name, fn_tokens )
print( "DEF " .. name )
local ops = emit( fn_tokens, 2 )
print( "END " .. name )
defined_functions[ name ] = ops
ops.digest = function( tokens, ii, token, output )
for _, t in ipairs( ops ) do
output[ #output + 1 ] = t
end
return ii + #ops
end
end
--- Generate graphviz code from FIKS instructions
-- @param FIKS_tokens The instruction array to generate the graph from
-- @return The graphviz description code (string)
local function generate_graph( FIKS_tokens )
local output = [[digraph FIKS {
"END" [ label = "END", color = red, penwidth = 3.0, rank = "max", fontsize = 18.0 ];
"START" [ label = "START", color = green, penwidth = 3.0, rank = "min", fontsize = 18.0 ];
"START" -> "1";
]]
local indent = 1
for i, instruction in ipairs( FIKS_tokens ) do
local prefix = "\n" .. string.rep( "\t", indent )
local label = ""
if instruction.op == "jmp" then
label = "JMP " .. instruction.args[ 1 ]
elseif instruction.op == "check" then
label = "?K" .. instruction.args[ 1 ] .. ":" .. instruction.args[ 2 ]
elseif instruction.op == "add" then
label = "K" .. instruction.args[ 1 ] .. "++"
elseif instruction.op == "sub" then
label = "K" .. instruction.args[ 1 ] .. "--"
end
output = output .. prefix .. '"' .. i .. '" [ label = "' .. label .. '" ];'
-- Where does this node point to?
local point = i + 1
if instruction.op == "jmp" then
point = i + instruction.args[ 1 ] + 1
elseif instruction.op == "check" then
local jump_point = i + instruction.args[ 2 ] + 1
if not FIKS_tokens[ jump_point ] then
jump_point = "END"
end
-- Jumps from checks are fancily blueish
output = output .. prefix .. '"' .. i .. '" -> "' .. jump_point .. '" [ color = blue ];'
end
if not FIKS_tokens[ point ] then
point = "END"
end
output = output .. prefix .. '"' .. i .. '" -> "' .. point .. '";\n'
end
output = output .. "\n}"
return output
end
--- Emit FIKS source code from a token stream
-- @param tokens The tokens to process
-- @param level The indentation level to use for debug output
-- @return An array of FIKS instructions
function emit( tokens, level )
level = level or 0
local indent = string.rep( " ", level )
-- Emit code
print( indent .. "Emitting code..." )
local i = 1
local output = {}
local constant_pool = {}
while i <= #tokens do
local token = tokens[ i ]
local ttype = token_types[ token.str ] or defined_functions[ token.str ]
if not ttype then
error( "Unexpected constant at " .. token.line .. ":" .. token.pos, 0 )
end
local ok, offset_or_err = pcall( ttype.digest, tokens, i, token, output, constant_pool )
if not ok then
error( "Syntax error at " .. token.line .. ":" .. token.pos .. ":\n\t" .. offset_or_err, 0 )
end
i = i + ( offset_or_err or error( "META: No return value for i\n\tfrom " .. token.str, 2 ) ) + 1
end
print( "Serialization of output tables is disabled." )
--serialise( output, level )
local source_out = indent
-- Loop through the output, one instruction at a time
for i, inst in ipairs( output ) do
source_out = source_out .. operations[ inst.op ].emit( unpack( inst.args ) ) .. "\n" .. indent
end
print( indent .. "FIKS source code:" )
print( indent .. "===" )
print( source_out )
print( indent .. "===" )
return output
end
token_types = {
add = {
digest = function( tokens, i, token, output )
local register1 = tokens[ i + 1 ]
local register2 = tokens[ i + 2 ]
if not register1 or not register2 then
error( "Expected add <register1> <register2>", 0 )
end
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 )
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 )
push( output, "check", register2, 3 )
push( output, "add", register1 )
push( output, "sub", register2 )
push( output, "jmp", -4 )
return 2
end;
};
def = {
digest = function( tokens, i, token, output )
local name = tokens[ i + 1 ]
if not name then
error( "Expected def <name> end", 0 )
end
name = name.str
local fn_tokens = {}
local ii = i + 2
-- Keep track of defs in defs
local branch = 1
while true do
if not tokens[ ii ] then
error( "Unifnished function definition", 0 )
end
if tokens[ ii ].str == "end" then
if branch == 1 then
break
end
branch = branch - 1
elseif tokens[ ii ].str == "def" then
branch = branch + 1
end
fn_tokens[ #fn_tokens + 1 ] = tokens[ ii ]
ii = ii + 1
end
define_function( name, fn_tokens )
return 0
end;
};
[ "end" ] = {
digest = function( tokens, i, token, output )
-- Ends should only be handled by def
error( "Nothing to end", 0 )
end;
};
set = {
digest = function( tokens, i, token, output, constant_pool )
local register = tokens[ i + 1 ]
local constant = tokens[ i + 2 ]
register = register and tonumber( register.str )
constant = constant and tonumber( constant.str )
if not register or not constant then
error( "Expected set <register> <constant>", 0 )
end
push( output, "check", register, 2 )
push( output, "sub", register )
push( output, "jmp", -3 )
for i = 1, constant do
push( output, "add", register )
end
--[[
-- Constant mustn't interfere with the constant_pool array part
constant = tostring( constant )
if not constant_pool[ constant ] then
constant_pool[ #constant_pool + 1 ] = constant
constant_pool[ constant ] = #constant_pool
end
token_types.copy.digest( {
{ str = tostring( ) };
{ str = tostring( constant_pool[ constant ] ) };
}, 0, {}, output )
--]]
return 2
end;
};
copy = {
size = 12;
digest = function( tokens, i, token, output )
local register1 = tokens[ i + 1 ]
local register2 = tokens[ i + 2 ]
if not register1 or not register2 then
error( "Expected copy <register1> <register2>", 0 )
end
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 )
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 )
-- Preparation, ensure that 1 is 0
push( output, "check", 1, 2 )
push( output, "sub", 1 )
push( output, "jmp", -3 )
-- Move the value to the working register
push( output, "check",
register2, 3 )
push( output, "add", 1 )
push( output, "sub", register2 )
push( output, "jmp", -4 )
-- Move it back to the register which is now 0 and also to the target register
push( output, "check", 1, 4 )
push( output, "add", register2 )
push( output, "add", register1 )
push( output, "sub", 1 )
push( output, "jmp", -5 )
return 2
end;
};
free = {
digest = function( tokens, i, token, output )
local reg = tokens[ i + 1 ]
if not reg then
error( "Expected free <register>", 0 )
end
reg = tonumber( reg.str ) or error( "Register is not a number", 0 )
push( output, "check", reg, 2 )
push( output, "sub", reg )
push( output, "jmp", -3 )
return 1
end
};
div = {
digest = function( tokens, i, token, output )
local register1 = tokens[ i + 1 ]
local register2 = tokens[ i + 2 ]
if not register1 or not register2 then
error( "Expected div <register1> <register2>", 0 )
end
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 )
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 )
local copy_size = token_types.copy.size
-- Preparation
-- Ensure our buffers are free
token_types.free.digest(
{
{ str = "2" };
},
0,
{},
output
)
token_types.free.digest(
{
{ str = "3" };
},
0,
{},
output
)
-- add *moves* the second register, so we'll have 0 at register1
token_types.add.digest(
{
-- 2 is one of the working registers for div
{ str = "2"; };
{ str = tostring( register1 ); };
},
0,
{},
output
)
-- We need a working copy of the divisor, too
token_types.copy.digest(
{
-- 3 is another one of the working registers for div
{ str = "3"; };
{ str = tostring( register2 ); };
},
0,
{},
output
)
-- Check if divident is 0
push( output, "check", 2, 6 )
-- Check if we have removed a full divisor
push( output, "check", 3, 3 )
push( output, "sub", 2 )
push( output, "sub", 3 )
-- Return to the checks
push( output, "jmp", -5 )
-- Increment the result
push( output, "add", register1 )
-- Jump backward to reset the working divisor copy and go again
push( output, "jmp", -7 - copy_size )
-- Check if the result was exact
push( output, "check", 3, 1 )
push( output, "jmp", 1 )
-- It was, increment the result
push( output, "add", register1 )
-- Done!
return 2
end;
};
mod = {
digest = function( tokens, i, token, output )
local register1 = tokens[ i + 1 ]
local register2 = tokens[ i + 2 ]
if not register1 or not register2 then
error( "Expected mod <register1> <register2>", 0 )
end
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 )
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 )
local copy_size = token_types.copy.size
-- Preparation
-- Ensure our buffers are free
token_types.free.digest(
{
{ str = "2" };
},
0,
{},
output
)
token_types.free.digest(
{
{ str = "3" };
},
0,
{},
output
)
token_types.add.digest(
{
{ str = "2"; };
{ str = tostring( register1 ); };
},
0,
{},
output
)
token_types.copy.digest(
{
{ str = "3"; };
{ str = tostring( register2 ); };
},
0,
{},
output
)
-- Check if divident is 0
push( output, "check", 2, 8 + copy_size )
-- Check if we have removed a full divisor
push( output, "check", 3, 3 )
push( output, "sub", 2 )
push( output, "sub", 3 )
push( output, "jmp", -5 )
-- Backup
--- Free register1
push( output, "check", register1, 2 )
push( output, "sub", register1 )
push( output, "jmp", -3 )
--- Copy contents of 2 to register1
token_types.copy.digest(
{
{ str = tostring( register1 ); };
{ str = "2"; };
},
0,
{},
output
)
-- Reset 3
push( output, "jmp", -9 - copy_size * 2 )
-- In case it was a match (like 8%4), return 0 (not 4)
push( output, "check", 3, 1 )
push( output, "jmp", 3 )
push( output, "check", register1, 2 )
push( output, "sub", register1 )
push( output, "jmp", -3 )
return 2
end;
};
mul = {
digest = function( tokens, i, token, output )
local register1 = tokens[ i + 1 ]
local register2 = tokens[ i + 2 ]
if not register1 or not register2 then
error( "Expected mul <register1> <register2>", 0 )
end
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 )
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 )
-- Preparation
local copy_size = token_types.copy.size
-- Ensure our buffers are free
token_types.free.digest(
{
{ str = "2" };
},
0,
{},
output
)
token_types.free.digest(
{
{ str = "3" };
},
0,
{},
output
)
token_types.add.digest(
{
{ str = "2"; };
{ str = tostring( register1 ); };
},
0,
{},
output
)
token_types.copy.digest(
{
{ str = "3"; };
{ str = tostring( register2 ); };
},
0,
{},
output
)
push( output, "check", 2, copy_size + 6 )
push( output, "check", 3, 3 )
push( output, "sub", 3 )
push( output, "add", register1 )
push( output, "jmp", -4 )
--- Copy contents of register2 to 3 again
token_types.copy.digest(
{
{ str = "3"; };
{ str = tostring( register2 ); };
},
0,
{},
output
)
push( output, "sub", 2 )
push( output, "jmp", -copy_size - 7 )
return 2
end;
};
}
operations = {
add = {
exec = function( registers, arg )
registers[ arg ] = registers[ arg ] + 1
return 0
end;
emit = function( arg )
return "K" .. arg .. "++"
end;
};
sub = {
exec = function( registers, arg )
registers[ arg ] = registers[ arg ] - 1
return 0
end;
emit = function( arg )
return "K" .. arg .. "--"
end;
};
check = {
exec = function( registers, i, jump )
if registers[ i ] == 0 then
return jump
end
return 0
end;
emit = function( i, jump )
return "?K" .. i .. " " .. jump
end;
};
jmp = {
exec = function( registers, arg )
return arg
end;
emit = function( arg )
return "JMP " .. arg
end;
};
lbl = {
exec = function()
return 0
end;
emit = function( ... )
return ";; " .. table.concat( { ... }, ", " )
end
}
}
-- Load source code
local f = io.open( args[ 1 ], "r" )
if not f then
error( "Failed to open file for reading!", 0 )
end
local source = " " .. f:read( "*a" ) .. " "
f:close()
-- Parse into tokens
print( "Parsing..." )
local tokens = {}
local i = 1
local line = 1
local char_pos = 1
local start_pos = false
local current_string = ""
while i < #source do
local char = string.sub( source, i, i )
if char == "/" and string.sub( source, i + 1, i + 1 ) == "/" then
local _, end_pos = string.find( source, "[^\n]*", i + 2 )
if end_pos then
i = end_pos
end
elseif string.match( char, "%s" ) then
-- Whitespace
if string.match( current_string, "%S" ) then
-- current_string isn't just whitespace
-- => We've got a token ready, end it
tokens[ #tokens + 1 ] = {
str = current_string;
line = line;
pos = start_pos;
}
start_pos = false
current_string = ""
end
if char == "\n" then
-- New line!
char_pos = 0
line = line + 1
end
else
if not start_pos then
start_pos = char_pos
end
current_string = current_string .. char
end
char_pos = char_pos + 1
i = i + 1
end
print( "Processed " .. i .. " characters in " .. line .. " lines." )
serialise( tokens )
local output = emit( tokens )
print( "Generating graph..." )
local graph = generate_graph( output )
local file = io.open( args[ 1 ] .. ".graph", "w" )
file:write( graph )
file:close()
-- Execute it!
local pc = 1
local registers = {
-- First 9 are working registers (would be 10 but Lua arrays count from 1)
0, 0, 0, 0, 0, 0, 0, 0, 0,
-- The rest (10 currently) are user-space registers
7, 3, 0, 0, 0, 0, 0, 0, 0, 0,
}
print( "Registers before execution:" )
serialise( registers )
local start_time = os.clock()
while pc <= #output do
-- The current instruction
local inst = output[ pc ]
print( inst.op, table.concat( inst.args, " " ) )
print_registers( registers )
pc = pc + operations[ inst.op ].exec( registers, unpack( inst.args ) ) + 1
end
print( "Time taken: " .. os.clock() - start_time )
print( "Registers after execution:" )
serialise( registers )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment