Created
November 19, 2016 11:51
-
-
Save viluon/646da7c011cc4b6247ad44b34ee0b5de to your computer and use it in GitHub Desktop.
FIKS VM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// viluon's simple language | |
// Test modulo | |
mod 10 11 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local args = { ... } | |
if #args ~= 1 then | |
error( "Expected 1 argument, the file to open!", 0 ) | |
end | |
local token_types | |
local operations | |
local emit | |
-- Global for the whole parser instance, yeah... I get that it's suboptimal, but cba | |
local defined_functions = {} | |
--- Serialise a table | |
-- @param tbl The table to pretty-print | |
-- @param indent The indentation level to use | |
-- @return nil | |
local function serialise( tbl, indent ) | |
indent = indent or 0 | |
for k, v in pairs( tbl ) do | |
if type( v ) == "table" then | |
print( string.rep( " ", indent ) .. tostring( k ) .. " = {" ) | |
serialise( v, indent + 2 ) | |
print( string.rep( " ", indent ) .. "}" ) | |
elseif type( v ) == "string" then | |
print( string.rep( " ", indent ) .. tostring( k ) .. " = '" .. tostring( v ) .. "'" ) | |
else | |
print( string.rep( " ", indent ) .. tostring( k ) .. " = " .. tostring( v ) ) | |
end | |
end | |
end | |
--- Pretty-print the registers | |
-- @param tbl The register table to pretty-print | |
-- @return nil | |
local function print_registers( tbl ) | |
local line_1 = "|" | |
local line_2 = "|" | |
local size = 5 | |
for i, v in ipairs( tbl ) do | |
line_1 = line_1 .. string.rep( " ", size - #tostring( i ) ) .. tostring( i ) .. "|" | |
line_2 = line_2 .. string.rep( " ", size - #tostring( v ) ) .. tostring( v ) .. "|" | |
end | |
print( line_1 ) | |
print( line_2 ) | |
end | |
--- Push an operation to the end of output | |
-- @param output The output stream to use | |
-- @param op The operation type | |
-- @param ... Arguments to the operation | |
-- @return nil | |
local function push( output, op, ... ) | |
output[ #output + 1 ] = { | |
op = op; | |
args = { ... }; | |
} | |
end | |
--- Create a new function | |
-- @param name The name for the function | |
-- @param fn_tokens The tokens of the function's body | |
-- @return nil | |
local function define_function( name, fn_tokens ) | |
print( "DEF " .. name ) | |
local ops = emit( fn_tokens, 2 ) | |
print( "END " .. name ) | |
defined_functions[ name ] = ops | |
ops.digest = function( tokens, ii, token, output ) | |
for _, t in ipairs( ops ) do | |
output[ #output + 1 ] = t | |
end | |
return ii + #ops | |
end | |
end | |
--- Generate graphviz code from FIKS instructions | |
-- @param FIKS_tokens The instruction array to generate the graph from | |
-- @return The graphviz description code (string) | |
local function generate_graph( FIKS_tokens ) | |
local output = [[digraph FIKS { | |
"END" [ label = "END", color = red, penwidth = 3.0, rank = "max", fontsize = 18.0 ]; | |
"START" [ label = "START", color = green, penwidth = 3.0, rank = "min", fontsize = 18.0 ]; | |
"START" -> "1"; | |
]] | |
local indent = 1 | |
for i, instruction in ipairs( FIKS_tokens ) do | |
local prefix = "\n" .. string.rep( "\t", indent ) | |
local label = "" | |
if instruction.op == "jmp" then | |
label = "JMP " .. instruction.args[ 1 ] | |
elseif instruction.op == "check" then | |
label = "?K" .. instruction.args[ 1 ] .. ":" .. instruction.args[ 2 ] | |
elseif instruction.op == "add" then | |
label = "K" .. instruction.args[ 1 ] .. "++" | |
elseif instruction.op == "sub" then | |
label = "K" .. instruction.args[ 1 ] .. "--" | |
end | |
output = output .. prefix .. '"' .. i .. '" [ label = "' .. label .. '" ];' | |
-- Where does this node point to? | |
local point = i + 1 | |
if instruction.op == "jmp" then | |
point = i + instruction.args[ 1 ] + 1 | |
elseif instruction.op == "check" then | |
local jump_point = i + instruction.args[ 2 ] + 1 | |
if not FIKS_tokens[ jump_point ] then | |
jump_point = "END" | |
end | |
-- Jumps from checks are fancily blueish | |
output = output .. prefix .. '"' .. i .. '" -> "' .. jump_point .. '" [ color = blue ];' | |
end | |
if not FIKS_tokens[ point ] then | |
point = "END" | |
end | |
output = output .. prefix .. '"' .. i .. '" -> "' .. point .. '";\n' | |
end | |
output = output .. "\n}" | |
return output | |
end | |
--- Emit FIKS source code from a token stream | |
-- @param tokens The tokens to process | |
-- @param level The indentation level to use for debug output | |
-- @return An array of FIKS instructions | |
function emit( tokens, level ) | |
level = level or 0 | |
local indent = string.rep( " ", level ) | |
-- Emit code | |
print( indent .. "Emitting code..." ) | |
local i = 1 | |
local output = {} | |
local constant_pool = {} | |
while i <= #tokens do | |
local token = tokens[ i ] | |
local ttype = token_types[ token.str ] or defined_functions[ token.str ] | |
if not ttype then | |
error( "Unexpected constant at " .. token.line .. ":" .. token.pos, 0 ) | |
end | |
local ok, offset_or_err = pcall( ttype.digest, tokens, i, token, output, constant_pool ) | |
if not ok then | |
error( "Syntax error at " .. token.line .. ":" .. token.pos .. ":\n\t" .. offset_or_err, 0 ) | |
end | |
i = i + ( offset_or_err or error( "META: No return value for i\n\tfrom " .. token.str, 2 ) ) + 1 | |
end | |
print( "Serialization of output tables is disabled." ) | |
--serialise( output, level ) | |
local source_out = indent | |
-- Loop through the output, one instruction at a time | |
for i, inst in ipairs( output ) do | |
source_out = source_out .. operations[ inst.op ].emit( unpack( inst.args ) ) .. "\n" .. indent | |
end | |
print( indent .. "FIKS source code:" ) | |
print( indent .. "===" ) | |
print( source_out ) | |
print( indent .. "===" ) | |
return output | |
end | |
token_types = { | |
add = { | |
digest = function( tokens, i, token, output ) | |
local register1 = tokens[ i + 1 ] | |
local register2 = tokens[ i + 2 ] | |
if not register1 or not register2 then | |
error( "Expected add <register1> <register2>", 0 ) | |
end | |
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 ) | |
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 ) | |
push( output, "check", register2, 3 ) | |
push( output, "add", register1 ) | |
push( output, "sub", register2 ) | |
push( output, "jmp", -4 ) | |
return 2 | |
end; | |
}; | |
def = { | |
digest = function( tokens, i, token, output ) | |
local name = tokens[ i + 1 ] | |
if not name then | |
error( "Expected def <name> end", 0 ) | |
end | |
name = name.str | |
local fn_tokens = {} | |
local ii = i + 2 | |
-- Keep track of defs in defs | |
local branch = 1 | |
while true do | |
if not tokens[ ii ] then | |
error( "Unifnished function definition", 0 ) | |
end | |
if tokens[ ii ].str == "end" then | |
if branch == 1 then | |
break | |
end | |
branch = branch - 1 | |
elseif tokens[ ii ].str == "def" then | |
branch = branch + 1 | |
end | |
fn_tokens[ #fn_tokens + 1 ] = tokens[ ii ] | |
ii = ii + 1 | |
end | |
define_function( name, fn_tokens ) | |
return 0 | |
end; | |
}; | |
[ "end" ] = { | |
digest = function( tokens, i, token, output ) | |
-- Ends should only be handled by def | |
error( "Nothing to end", 0 ) | |
end; | |
}; | |
set = { | |
digest = function( tokens, i, token, output, constant_pool ) | |
local register = tokens[ i + 1 ] | |
local constant = tokens[ i + 2 ] | |
register = register and tonumber( register.str ) | |
constant = constant and tonumber( constant.str ) | |
if not register or not constant then | |
error( "Expected set <register> <constant>", 0 ) | |
end | |
push( output, "check", register, 2 ) | |
push( output, "sub", register ) | |
push( output, "jmp", -3 ) | |
for i = 1, constant do | |
push( output, "add", register ) | |
end | |
--[[ | |
-- Constant mustn't interfere with the constant_pool array part | |
constant = tostring( constant ) | |
if not constant_pool[ constant ] then | |
constant_pool[ #constant_pool + 1 ] = constant | |
constant_pool[ constant ] = #constant_pool | |
end | |
token_types.copy.digest( { | |
{ str = tostring( ) }; | |
{ str = tostring( constant_pool[ constant ] ) }; | |
}, 0, {}, output ) | |
--]] | |
return 2 | |
end; | |
}; | |
copy = { | |
size = 12; | |
digest = function( tokens, i, token, output ) | |
local register1 = tokens[ i + 1 ] | |
local register2 = tokens[ i + 2 ] | |
if not register1 or not register2 then | |
error( "Expected copy <register1> <register2>", 0 ) | |
end | |
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 ) | |
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 ) | |
-- Preparation, ensure that 1 is 0 | |
push( output, "check", 1, 2 ) | |
push( output, "sub", 1 ) | |
push( output, "jmp", -3 ) | |
-- Move the value to the working register | |
push( output, "check", | |
register2, 3 ) | |
push( output, "add", 1 ) | |
push( output, "sub", register2 ) | |
push( output, "jmp", -4 ) | |
-- Move it back to the register which is now 0 and also to the target register | |
push( output, "check", 1, 4 ) | |
push( output, "add", register2 ) | |
push( output, "add", register1 ) | |
push( output, "sub", 1 ) | |
push( output, "jmp", -5 ) | |
return 2 | |
end; | |
}; | |
free = { | |
digest = function( tokens, i, token, output ) | |
local reg = tokens[ i + 1 ] | |
if not reg then | |
error( "Expected free <register>", 0 ) | |
end | |
reg = tonumber( reg.str ) or error( "Register is not a number", 0 ) | |
push( output, "check", reg, 2 ) | |
push( output, "sub", reg ) | |
push( output, "jmp", -3 ) | |
return 1 | |
end | |
}; | |
div = { | |
digest = function( tokens, i, token, output ) | |
local register1 = tokens[ i + 1 ] | |
local register2 = tokens[ i + 2 ] | |
if not register1 or not register2 then | |
error( "Expected div <register1> <register2>", 0 ) | |
end | |
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 ) | |
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 ) | |
local copy_size = token_types.copy.size | |
-- Preparation | |
-- Ensure our buffers are free | |
token_types.free.digest( | |
{ | |
{ str = "2" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.free.digest( | |
{ | |
{ str = "3" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
-- add *moves* the second register, so we'll have 0 at register1 | |
token_types.add.digest( | |
{ | |
-- 2 is one of the working registers for div | |
{ str = "2"; }; | |
{ str = tostring( register1 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
-- We need a working copy of the divisor, too | |
token_types.copy.digest( | |
{ | |
-- 3 is another one of the working registers for div | |
{ str = "3"; }; | |
{ str = tostring( register2 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
-- Check if divident is 0 | |
push( output, "check", 2, 6 ) | |
-- Check if we have removed a full divisor | |
push( output, "check", 3, 3 ) | |
push( output, "sub", 2 ) | |
push( output, "sub", 3 ) | |
-- Return to the checks | |
push( output, "jmp", -5 ) | |
-- Increment the result | |
push( output, "add", register1 ) | |
-- Jump backward to reset the working divisor copy and go again | |
push( output, "jmp", -7 - copy_size ) | |
-- Check if the result was exact | |
push( output, "check", 3, 1 ) | |
push( output, "jmp", 1 ) | |
-- It was, increment the result | |
push( output, "add", register1 ) | |
-- Done! | |
return 2 | |
end; | |
}; | |
mod = { | |
digest = function( tokens, i, token, output ) | |
local register1 = tokens[ i + 1 ] | |
local register2 = tokens[ i + 2 ] | |
if not register1 or not register2 then | |
error( "Expected mod <register1> <register2>", 0 ) | |
end | |
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 ) | |
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 ) | |
local copy_size = token_types.copy.size | |
-- Preparation | |
-- Ensure our buffers are free | |
token_types.free.digest( | |
{ | |
{ str = "2" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.free.digest( | |
{ | |
{ str = "3" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.add.digest( | |
{ | |
{ str = "2"; }; | |
{ str = tostring( register1 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.copy.digest( | |
{ | |
{ str = "3"; }; | |
{ str = tostring( register2 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
-- Check if divident is 0 | |
push( output, "check", 2, 8 + copy_size ) | |
-- Check if we have removed a full divisor | |
push( output, "check", 3, 3 ) | |
push( output, "sub", 2 ) | |
push( output, "sub", 3 ) | |
push( output, "jmp", -5 ) | |
-- Backup | |
--- Free register1 | |
push( output, "check", register1, 2 ) | |
push( output, "sub", register1 ) | |
push( output, "jmp", -3 ) | |
--- Copy contents of 2 to register1 | |
token_types.copy.digest( | |
{ | |
{ str = tostring( register1 ); }; | |
{ str = "2"; }; | |
}, | |
0, | |
{}, | |
output | |
) | |
-- Reset 3 | |
push( output, "jmp", -9 - copy_size * 2 ) | |
-- In case it was a match (like 8%4), return 0 (not 4) | |
push( output, "check", 3, 1 ) | |
push( output, "jmp", 3 ) | |
push( output, "check", register1, 2 ) | |
push( output, "sub", register1 ) | |
push( output, "jmp", -3 ) | |
return 2 | |
end; | |
}; | |
mul = { | |
digest = function( tokens, i, token, output ) | |
local register1 = tokens[ i + 1 ] | |
local register2 = tokens[ i + 2 ] | |
if not register1 or not register2 then | |
error( "Expected mul <register1> <register2>", 0 ) | |
end | |
register1 = tonumber( register1.str ) or error( "register1 is not a number", 0 ) | |
register2 = tonumber( register2.str ) or error( "register2 is not a number", 0 ) | |
-- Preparation | |
local copy_size = token_types.copy.size | |
-- Ensure our buffers are free | |
token_types.free.digest( | |
{ | |
{ str = "2" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.free.digest( | |
{ | |
{ str = "3" }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.add.digest( | |
{ | |
{ str = "2"; }; | |
{ str = tostring( register1 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
token_types.copy.digest( | |
{ | |
{ str = "3"; }; | |
{ str = tostring( register2 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
push( output, "check", 2, copy_size + 6 ) | |
push( output, "check", 3, 3 ) | |
push( output, "sub", 3 ) | |
push( output, "add", register1 ) | |
push( output, "jmp", -4 ) | |
--- Copy contents of register2 to 3 again | |
token_types.copy.digest( | |
{ | |
{ str = "3"; }; | |
{ str = tostring( register2 ); }; | |
}, | |
0, | |
{}, | |
output | |
) | |
push( output, "sub", 2 ) | |
push( output, "jmp", -copy_size - 7 ) | |
return 2 | |
end; | |
}; | |
} | |
operations = { | |
add = { | |
exec = function( registers, arg ) | |
registers[ arg ] = registers[ arg ] + 1 | |
return 0 | |
end; | |
emit = function( arg ) | |
return "K" .. arg .. "++" | |
end; | |
}; | |
sub = { | |
exec = function( registers, arg ) | |
registers[ arg ] = registers[ arg ] - 1 | |
return 0 | |
end; | |
emit = function( arg ) | |
return "K" .. arg .. "--" | |
end; | |
}; | |
check = { | |
exec = function( registers, i, jump ) | |
if registers[ i ] == 0 then | |
return jump | |
end | |
return 0 | |
end; | |
emit = function( i, jump ) | |
return "?K" .. i .. " " .. jump | |
end; | |
}; | |
jmp = { | |
exec = function( registers, arg ) | |
return arg | |
end; | |
emit = function( arg ) | |
return "JMP " .. arg | |
end; | |
}; | |
lbl = { | |
exec = function() | |
return 0 | |
end; | |
emit = function( ... ) | |
return ";; " .. table.concat( { ... }, ", " ) | |
end | |
} | |
} | |
-- Load source code | |
local f = io.open( args[ 1 ], "r" ) | |
if not f then | |
error( "Failed to open file for reading!", 0 ) | |
end | |
local source = " " .. f:read( "*a" ) .. " " | |
f:close() | |
-- Parse into tokens | |
print( "Parsing..." ) | |
local tokens = {} | |
local i = 1 | |
local line = 1 | |
local char_pos = 1 | |
local start_pos = false | |
local current_string = "" | |
while i < #source do | |
local char = string.sub( source, i, i ) | |
if char == "/" and string.sub( source, i + 1, i + 1 ) == "/" then | |
local _, end_pos = string.find( source, "[^\n]*", i + 2 ) | |
if end_pos then | |
i = end_pos | |
end | |
elseif string.match( char, "%s" ) then | |
-- Whitespace | |
if string.match( current_string, "%S" ) then | |
-- current_string isn't just whitespace | |
-- => We've got a token ready, end it | |
tokens[ #tokens + 1 ] = { | |
str = current_string; | |
line = line; | |
pos = start_pos; | |
} | |
start_pos = false | |
current_string = "" | |
end | |
if char == "\n" then | |
-- New line! | |
char_pos = 0 | |
line = line + 1 | |
end | |
else | |
if not start_pos then | |
start_pos = char_pos | |
end | |
current_string = current_string .. char | |
end | |
char_pos = char_pos + 1 | |
i = i + 1 | |
end | |
print( "Processed " .. i .. " characters in " .. line .. " lines." ) | |
serialise( tokens ) | |
local output = emit( tokens ) | |
print( "Generating graph..." ) | |
local graph = generate_graph( output ) | |
local file = io.open( args[ 1 ] .. ".graph", "w" ) | |
file:write( graph ) | |
file:close() | |
-- Execute it! | |
local pc = 1 | |
local registers = { | |
-- First 9 are working registers (would be 10 but Lua arrays count from 1) | |
0, 0, 0, 0, 0, 0, 0, 0, 0, | |
-- The rest (10 currently) are user-space registers | |
7, 3, 0, 0, 0, 0, 0, 0, 0, 0, | |
} | |
print( "Registers before execution:" ) | |
serialise( registers ) | |
local start_time = os.clock() | |
while pc <= #output do | |
-- The current instruction | |
local inst = output[ pc ] | |
print( inst.op, table.concat( inst.args, " " ) ) | |
print_registers( registers ) | |
pc = pc + operations[ inst.op ].exec( registers, unpack( inst.args ) ) + 1 | |
end | |
print( "Time taken: " .. os.clock() - start_time ) | |
print( "Registers after execution:" ) | |
serialise( registers ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment