Last active
February 16, 2016 16:56
-
-
Save wqweto/8fdeac414391cdbde356 to your computer and use it in GitHub Desktop.
Brainfuck optimizing compiler in Lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This transpiler generates Lua source code from input brainf**k program, then compiles and | |
-- executes this Lua source code. Can be used with LuaJIT for jitted brainf**k experience. | |
-- | |
-- Based on https://github.com/prapin/LuaBrainFuck/blob/master/brainfuck.lua | |
-- Inspired by https://github.com/luapower/bf and its dynasm implementation (~5-10x faster) | |
-- Optimizations from https://www.nayuki.io/page/optimizing-brainfuck-compiler | |
-- | |
-- Optimizations due to lpeg grammar: | |
-- instead of generating repeating `i = i + 1` just output `i = i + N` | |
-- instead of generating repeating `t[i] = t[i] + 1` just output `t[i] = t[i] + N` | |
-- | |
-- Optimizations implemented with visitors on AST: | |
-- Postponing movements | |
-- Assign followed by add | |
-- Simple loops | |
-- | |
-- Latter AST optimization can be switched off and reorders for implementation of the -O parameter | |
-- of a hypothetical compiler | |
-- | |
-- Using an integer array (via LuaJIT ffi) for `t` is not faster than regular table (on Asus T100) | |
local lpeg = require"lpeg" | |
local decoda_output = decoda_output | |
-- local strict = require"strict" | |
local function writefile(name, content) | |
local f = io.open(name, "wb") | |
f:write(content) | |
f:close() | |
end | |
local function readfile(name) | |
local f = io.open(name, "rb") | |
local ret = f:read("*all") | |
f:close() | |
return ret | |
end | |
-- AST nodes factories as will be used by the lpeg parsing grammar below | |
local Defs = { } do | |
function Defs.createAssign(val) | |
return { tag = "assign", val = val, | |
accept = function(n, v) return v:visitAssign(n) end } | |
end | |
function Defs.assign() return Defs.createAssign(0) end | |
function Defs.createModify(inc, multidx) | |
return { tag = "modify", inc = inc, multidx = multidx, | |
accept = function(n, v) return v:visitModify(n) end } | |
end | |
function Defs.modify(a) | |
return Defs.createModify((a:sub(1, 1) == "+" and 1 or -1) * a:len()) | |
end | |
function Defs.createMove(dist) | |
return { tag = "move", dist = dist, | |
accept = function(n, v) return v:visitMove(n) end } | |
end | |
function Defs.move(a) | |
return Defs.createMove((a:sub(1, 1) == ">" and 1 or -1) * a:len()) | |
end | |
function Defs.io(a) | |
return { tag = (a == "." and "out" or "in"), | |
accept = function(n, v) return v:visitIo(n) end } | |
end | |
function Defs.loop(body) | |
return { tag = "loop", body = body, | |
accept = function(n, v) return v:visitLoop(n) end } | |
end | |
function Defs.root(body) | |
return { tag = "root", body = body, | |
accept = function(n, v) return v:visitRoot(n) end } | |
end | |
end | |
-- This brainf**k grammar collapses consecutive modification/repositions and handles | |
-- primitive assigment (of 0) | |
local grammar = lpeg.P{ "body", | |
body = lpeg.Ct(lpeg.V"expr" ^ 0), | |
expr = lpeg.P"[-]" ^ 1 / Defs.assign | |
+ lpeg.S"+-" ^ 1 / Defs.modify | |
+ lpeg.S"<>" ^ 1 / Defs.move | |
+ lpeg.S".," / Defs.io | |
+ (lpeg.S"[" * lpeg.Ct(lpeg.V"expr" ^ 0) * lpeg.S"]") / Defs.loop | |
+ (1 - lpeg.S"+-<>.,[]") -- ignore invalid input | |
} | |
-- == Visitor pattern == (from https://en.wikipedia.org/wiki/Visitor_pattern) | |
-- In object-oriented programming and software engineering, the visitor design pattern is a | |
-- way of separating an algorithm from an object structure on which it operates. A practical | |
-- result of this separation is the ability to add new operations to existing object structures | |
-- without modifying those structures. It is one way to follow the open/closed principle. | |
-- | |
-- In essence, the visitor allows one to add new virtual functions to a family of classes | |
-- without modifying the classes themselves; instead, one creates a visitor class that | |
-- implements all of the appropriate specializations of the virtual function. The visitor | |
-- takes the instance reference as input, and implements the goal through double dispatch. | |
local Visitor = { } do | |
Visitor.__index = Visitor | |
function Visitor:visitNode(n) | |
for _, v in ipairs(n.body or { }) do v:accept(self) end | |
return n | |
end | |
function Visitor:visitAssign(n) return self:visitNode(n) end | |
function Visitor:visitModify(n) return self:visitNode(n) end | |
function Visitor:visitMove(n) return self:visitNode(n) end | |
function Visitor:visitIo(n) return self:visitNode(n) end | |
function Visitor:visitLoop(n) return self:visitNode(n) end | |
-- note: by default `root` node is chained through `loop` node impl (calls `visitLoop`) | |
function Visitor:visitRoot(n) return self:visitLoop(n) end | |
end | |
-- Helper class for AST printing as nested tables | |
local Dumper = { } do | |
Dumper.__index = setmetatable(Dumper, Visitor) | |
function Dumper.new(defs) | |
return rawset(setmetatable(defs or { }, Dumper), "tag", "Dumper") | |
end | |
function Dumper:append(str) self.output((" "):rep(self.dep)..str) end | |
function Dumper:visitNode(n) self:append(n.tag) end | |
function Dumper:visitAssign(n) self:append(n.tag..", val="..n.val) end | |
function Dumper:visitModify(n) self:append(n.tag..", inc="..n.inc) end | |
function Dumper:visitMove(n) self:append(n.tag..", dist="..n.dist) end | |
function Dumper:visitLoop(n) | |
self:append(n.tag.." {") | |
self.dep = self.dep + 1 | |
Visitor.visitNode(self, n) | |
self.dep = self.dep - 1 | |
self:append("}") | |
end | |
function Dumper:visitRoot(n) | |
self.dep = 0 | |
self.output = self.output or print | |
Visitor.visitNode(self, n) | |
end | |
end | |
-- == Postponing movements == | |
-- In a block of brainf**k commands, if the block consists of only increments/decrements, | |
-- left/right, and input/output, then we can keep track of the virtual movement and then | |
-- perform it in one shot at the end of the block. For example, we can translate >+>-> into | |
-- t[i+1] = t[i+1]+1; t[i+2] = t[i+2]-1; i+=3;. Note that we have to actually perform the | |
-- movements before entering a loop/if and before exiting a loop/if. | |
local OffsetOptimizer = { } do | |
OffsetOptimizer.__index = setmetatable(OffsetOptimizer, Visitor) | |
function OffsetOptimizer.new(defs) | |
return rawset(setmetatable(defs or { }, OffsetOptimizer), "tag", "OffsetOptimizer") | |
end | |
function OffsetOptimizer:visitLoop(n) | |
local t = { } | |
local offset = 0 | |
for _, v in ipairs(n.body) do | |
if v.tag == "move" then | |
offset = offset + v.dist | |
v = nil | |
elseif v.tag == "loop" then | |
if offset ~= 0 then | |
t[#t + 1] = Defs.createMove(offset) | |
offset = 0 | |
end | |
else | |
v.offset = offset | |
end | |
if v then | |
t[#t + 1] = v:accept(self) | |
end | |
end | |
if offset ~= 0 then | |
t[#t + 1] = Defs.createMove(offset) | |
end | |
n.body = t | |
return n | |
end | |
end | |
-- == Assign followed by add == | |
-- Obviously, t[i] = 0; followed by t[i] = t[i]+v; can be replaced by t[i] = v;. As well, | |
-- t[i] = 0; followed by t[i] = t[i] + t[j]*v; can be replaced by t[i] = t[j]*v;. | |
local AssignOptimizer = { } do | |
AssignOptimizer.__index = setmetatable(AssignOptimizer, Visitor) | |
function AssignOptimizer.new(defs) | |
return rawset(setmetatable(defs or { }, AssignOptimizer), "tag", "AssignOptimizer") | |
end | |
function AssignOptimizer:visitLoop(n) | |
local t = { } | |
for i, v in ipairs(n.body) do | |
if v.tag == "modify" then | |
local prev = t[i-1] or { } | |
if prev.tag == "assign" and prev.offset == v.offset | |
and (prev.srcidx or 0) == (v.srcidx or 0) then | |
prev.val = prev.val + v.inc | |
v = nil | |
end | |
end | |
if v then | |
t[#t + 1] = v:accept(self) | |
end | |
end | |
n.body = t | |
return n | |
end | |
end | |
-- == Simple loops == | |
-- A moment’s thought should make it clear that [-] translates to t[i] = 0;. But we can | |
-- generalize even further than that. If the loop body has no subloops and no input/output, | |
-- all the movements add up to 0, and all the increments/decrements at t[i] add up to −1, | |
-- then we are basically running the loop body t[i] times. So for all the increments/decrements | |
-- outside of t[i], some multiple of t[i] is added to that location. For example, we can | |
-- translate [->+>---<<] into t[i+1] = t[i+1]+t[i]; t[i+2] = t[i+2]-3*t[i]; t[i] = 0;. | |
local MultiplyOptimizer = { } do | |
MultiplyOptimizer.__index = setmetatable(MultiplyOptimizer, Visitor) | |
function MultiplyOptimizer.new(defs) | |
return rawset(setmetatable(defs or { }, MultiplyOptimizer), "tag", "MultiplyOptimizer") | |
end | |
local function tryTransform(body, d) | |
local offset = 0 | |
for _, v in ipairs(body) do | |
if v.tag == "assign" then | |
-- do nothing | |
elseif v.tag == "modify" then | |
local idx = offset + v.offset | |
d[idx] = (d[idx] or 0) + v.inc | |
elseif v.tag == "move" then | |
offset = offset + v.dist | |
else | |
return -- loop body too specialized | |
end | |
end | |
return offset == 0 and d[0] == -1 | |
end | |
function MultiplyOptimizer:visitLoop(n) | |
local t = { } | |
for _, v in ipairs(n.body) do | |
local d = { } | |
if v.body and tryTransform(v.body, d) then | |
local srcidx = v.offset or 0 | |
d[0] = nil | |
for i, inc in pairs(d) do | |
t[#t + 1] = Defs.createModify(inc, srcidx) | |
t[#t].offset = srcidx + i | |
end | |
t[#t + 1] = Defs.createAssign(0) | |
t[#t].offset = srcidx | |
else | |
t[#t + 1] = v | |
end | |
end | |
n.body = t | |
return n | |
end | |
end | |
local LuaEmitter = { } do | |
LuaEmitter.__index = setmetatable(LuaEmitter, Visitor) | |
function LuaEmitter.new(defs) | |
return rawset(setmetatable(defs or { }, LuaEmitter), "tag", "LuaEmitter") | |
end | |
function LuaEmitter:append(str) | |
self.result[#self.result + 1] = (" "):rep(self.dep)..str | |
end | |
local function tidx(offset) | |
return "t[i"..((offset or 0) ~= 0 and (offset > 0 and "+" or "")..offset or "").."]" | |
end | |
function LuaEmitter:visitAssign(n) | |
self:append(tidx(n.offset).." = "..n.val) | |
end | |
function LuaEmitter:visitModify(n) | |
self:append(tidx(n.offset).." = "..tidx(n.offset)..(n.inc > 0 and "+" or "") | |
..n.inc..(n.multidx and "*"..tidx(n.multidx) or "")) | |
end | |
function LuaEmitter:visitMove(n) | |
self:append("i = i"..(n.dist > 0 and "+" or "")..n.dist) | |
end | |
function LuaEmitter:visitIo(n) | |
self:append(n.node == "in" and tidx(n.offset).." = get()" or "put("..tidx(n.offset)..")") | |
end | |
function LuaEmitter:visitLoop(n) | |
self:append("while "..tidx(n.offset).." ~= 0 do") | |
self.dep = self.dep + 1 | |
if self.logdebug then | |
self:append(self.logdebug.."("..(#self.result + 1)..", i)") | |
end | |
Visitor.visitNode(self, n) | |
self.dep = self.dep - 1 | |
self:append("end") | |
end | |
function LuaEmitter:visitRoot(n) | |
self.dep = 0 | |
self.result = { } | |
Visitor.visitNode(self, n) | |
return table.concat(self.result, "\n") | |
end | |
end | |
local function bf2(code) | |
local ast = Defs.root(grammar:match(code)) | |
ast = ast:accept(OffsetOptimizer.new()) | |
ast = ast:accept(AssignOptimizer.new()) | |
ast = ast:accept(MultiplyOptimizer.new()) | |
--ast:accept(Dumper.new()) | |
local src = ast:accept(LuaEmitter.new()) -- { logdebug = "logdebug" })) | |
if decoda_output then | |
writefile("bf2.src", src) | |
end | |
local env = setmetatable({ | |
i = 0, | |
t = setmetatable({}, { __index = function() return 0 end }), | |
get = function() return io.read(1):byte() end, | |
put = function(c) io.write(string.char(tonumber(c))) end, | |
logdebug = function(...) decoda_output(table.concat({ ... }, "\t")) end, | |
}, { __index = _G }) | |
if setfenv and loadstring then | |
return setfenv(assert(loadstring(src)), env)() | |
end | |
return load(src, nil, "t", env)() | |
end | |
if ... ~= "bf2" then | |
local file = #arg > 0 and arg[1] | |
if not file then | |
-- sample Hello World! program | |
bf2('++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++'.. | |
'..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.') | |
else | |
bf2(readfile(file)) | |
end | |
if decoda_output then | |
io.read() | |
end | |
end | |
return bf2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment