Skip to content

Instantly share code, notes, and snippets.

@bdw

bdw/fib.asm Secret

Created March 31, 2015 23:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdw/fef76ca07b7203e49fc2 to your computer and use it in GitHub Desktop.
Save bdw/fef76ca07b7203e49fc2 to your computer and use it in GitHub Desktop.
Estimated Maximum
.section .rodata
.fmt:
.string "%lld\n"
.text
.globl main
/* The driving loop. Nothing to see here */
main:
pushq %rbp
mov %rsp, %rbp
subq $0x20, %rsp
movq $1, 0x8(%rsp)
.loop:
movq 0x8(%rsp), %rdi
call fib1 /* or call fib0 */
/* print the string, disabled as it dominates the time spent */
/*
movq $.fmt, %rdi
movq %rax, %rsi
movq $0, %rax
call printf
*/
incq 0x8(%rsp)
/* loop 60000 times. the answer of fib doesn't make sense then, but that's not the point of this exercise */
movq $60000, %rdx
cmpq 0x8(%rsp), %rdx
jg .loop
movq %rbp, %rsp
popq %rbp
ret
/* fibonacci algorithm as it is currently compiled from the 'hot loop'
* within the nqp frame above. The actual nqp frame is considerably
* larger due to lexotic handling, why it does that I don't really know. */
fib0:
pushq %rbp
movq %rsp, %rbp
subq $0x80, %rsp
/* the actual code uses %rbx for moarvm registers, but these aren't available here, so
* we make do with the regular stack pointer */
movq %rdi, (%rsp)
movq $0, 0x8(%rsp)
movq $1, 0x10(%rsp)
movq $0, 0x20(%rsp)
_loop:
/* cmp_i */
movq (%rsp), %rax
cmpq 0x20(%rsp), %rax
setg %al
movzx %al, %rax
movq %rax, 0x20(%rsp)
/* if_i */
movq 0x20(%rsp), %rax
testq %rax, %rax
je _epilogue
/* add_i */
movq 0x10(%rsp), %rax
addq 0x8(%rsp), %rax
movq %rax, 0x10(%rsp)
/* sub_i */
movq 0x10(%rsp), %rax
subq 0x8(%rsp), %rax
movq %rax, 0x8(%rsp)
/* const_i64_16 */
movq $1, 0x28(%rsp)
/* sub_i */
movq (%rsp), %rax
subq 0x28(%rsp), %rax
movq %rax, 0x28(%rsp)
/* set */
movq 0x28(%rsp), %rcx
movq %rcx, (%rsp)
/* const_i64_16. this is useless but not removed */
movq $0, 0x30(%rsp)
/* goto. note that the actual code emits a gc checkpoint at this and the earlier jump,
* but we're not running inside MoarVM so I've eliminated them in both frames */
jmp _loop
_epilogue:
mov 0x10(%rsp), %rax
/* function epilogue */
movq %rbp, %rsp /* restore stack top */
popq %rbp /* restore stack bottom */
ret
/* this is an example of what the tree compiler should reasonably be able to do,
* without truly fancy optimizations. The algorithm is the same, I've just added
* register selection and very minor instruction reductions based on constants. */
fib1:
pushq %rbp
movq %rsp, %rbp
movq $1, %rax
movq $0, %rcx
movq %rdi, %rdx
_loop2:
/* if_i(cmp_i(r0, const_i(0)), _epilogue2) == unless_i(r0, epilogue2) */
test %rdx, %rdx
jz _epilogue2
/* add_i */
addq %rcx, %rax
/* sub_i. I'm sure I can find a way to optimze this further, but that would not be realistic in general */
movq %rax, %r8
subq %rcx, %r8
movq %r8, %rcx
/* sub_i(r0, const_i(1)) == dec_i(r0). note that decq isn't particularly fast */
decq %rdx
/* goto */
jmp _loop2
_epilogue2:
movq %rcx, %rax
movq %rbp, %rsp
popq %rbp
ret
#!/usr/bin/env nqp-m
# what can i say, i like the fibonacci series
sub fib(int $n) {
my int $a := 0;
my int $b := 1;
while $n > 0 {
$b := $b + $a;
$a := $b - $a;
$n := $n - 1;
}
return $a;
}
my int $i := 0;
while $i < 60 {
nqp::say(fib($i));
$i := $i + 1;
}
# With fib0 as the algorithm:
$ make bar
$ time ./bar
real 0m7.508s
user 0m7.513s
sys 0m0.001s
# now edit the file and use fib1
$ make bar
$ time ./bar
real 0m1.414s
user 0m1.414s
sys 0m0.001s
# 0.7508 / 0.1414 = 5,3x speedup. Your Mileage Will Vary.
# This is admittedly a very tiny and somewhat artificial example!
# Not nearly all code will benefit in the same way.
# On the other hand, this example only demonstrates one of all feasible optimizations.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment