Playing with tail call optimisation emulation in Golo, I tried this code (inspired by https://github.com/kachayev/fn.py):
module TCO
union TcoResult = {
Stop = { value }
Continue = { arguments }
Switch = { func, arguments }
}
function tco = |func| -> |args...| {
var nextAction = func
var result = null
var nextCallArguments = args
while true {
result = nextAction: invoke(nextCallArguments)
case {
when result oftype TCO.types.TcoResult$Stop.class {
return result: value()
}
when result oftype TCO.types.TcoResult$Continue.class {
nextCallArguments = result: arguments()
}
when result oftype TCO.types.TcoResult$Switch.class {
nextCallArguments = result: arguments()
nextAction = result: func()
}
otherwise { raise("wrong result type") }
}
}
}
function stop = |x| -> Stop(x)
function recur = |args...| -> Continue(args)
and
module TestTCO
import TCO
function fib = |n| -> match {
when n <= 1_L then 1_L
otherwise fib(n - 1_L) + fib(n - 2_L)
}
local function _tcfib = |a, b, n| -> match {
when n == 0_L then a
when n == 1_L then b
otherwise _tcfib(b, a + b, n - 1_L)
}
function tcfib = |n| -> _tcfib(1_L, 1_L, n)
@!tco
local function _tcofib = |a, b, n| -> match {
when n == 0_L then stop(a)
when n == 1_L then stop(b)
otherwise recur(b, a + b, n - 1_L)
}
function tcofib = |n| -> _tcofib(1_L, 1_L, n)
function timeit = |fref, val| {
let start = System.currentTimeMillis()
try {
let r = fref(val)
let end = System.currentTimeMillis()
println(r + " in " + (end - start) + "ms")
} catch (e) {
case {
when e oftype java.lang.StackOverflowError.class {
println("Blam...")
}
otherwise {
throw e
}
}
}
}
function main = |args| {
let v = args: get(0): toLong()
println("n = " + v)
timeit(^tcfib, v)
timeit(^tcofib, v)
timeit(^fib, v)
}
$ golo compile *.golo && golo run --module TestTCO 40
n = 40
165580141 in 23ms
165580141 in 64ms
165580141 in 4636ms
Some overhead added by the decorator (predictable), but... why does the not optimized tail recursive version is so much better than the recursive one?