-
-
Save bdw/fef76ca07b7203e49fc2 to your computer and use it in GitHub Desktop.
Estimated Maximum
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.section .rodata | |
.fmt: | |
.string "%lld\n" | |
.text | |
.globl main | |
/* The driving loop. Nothing to see here */ | |
main: | |
pushq %rbp | |
mov %rsp, %rbp | |
subq $0x20, %rsp | |
movq $1, 0x8(%rsp) | |
.loop: | |
movq 0x8(%rsp), %rdi | |
call fib1 /* or call fib0 */ | |
/* print the string, disabled as it dominates the time spent */ | |
/* | |
movq $.fmt, %rdi | |
movq %rax, %rsi | |
movq $0, %rax | |
call printf | |
*/ | |
incq 0x8(%rsp) | |
/* loop 60000 times. the answer of fib doesn't make sense then, but that's not the point of this exercise */ | |
movq $60000, %rdx | |
cmpq 0x8(%rsp), %rdx | |
jg .loop | |
movq %rbp, %rsp | |
popq %rbp | |
ret | |
/* fibonacci algorithm as it is currently compiled from the 'hot loop' | |
* within the nqp frame above. The actual nqp frame is considerably | |
* larger due to lexotic handling, why it does that I don't really know. */ | |
fib0: | |
pushq %rbp | |
movq %rsp, %rbp | |
subq $0x80, %rsp | |
/* the actual code uses %rbx for moarvm registers, but these aren't available here, so | |
* we make do with the regular stack pointer */ | |
movq %rdi, (%rsp) | |
movq $0, 0x8(%rsp) | |
movq $1, 0x10(%rsp) | |
movq $0, 0x20(%rsp) | |
_loop: | |
/* cmp_i */ | |
movq (%rsp), %rax | |
cmpq 0x20(%rsp), %rax | |
setg %al | |
movzx %al, %rax | |
movq %rax, 0x20(%rsp) | |
/* if_i */ | |
movq 0x20(%rsp), %rax | |
testq %rax, %rax | |
je _epilogue | |
/* add_i */ | |
movq 0x10(%rsp), %rax | |
addq 0x8(%rsp), %rax | |
movq %rax, 0x10(%rsp) | |
/* sub_i */ | |
movq 0x10(%rsp), %rax | |
subq 0x8(%rsp), %rax | |
movq %rax, 0x8(%rsp) | |
/* const_i64_16 */ | |
movq $1, 0x28(%rsp) | |
/* sub_i */ | |
movq (%rsp), %rax | |
subq 0x28(%rsp), %rax | |
movq %rax, 0x28(%rsp) | |
/* set */ | |
movq 0x28(%rsp), %rcx | |
movq %rcx, (%rsp) | |
/* const_i64_16. this is useless but not removed */ | |
movq $0, 0x30(%rsp) | |
/* goto. note that the actual code emits a gc checkpoint at this and the earlier jump, | |
* but we're not running inside MoarVM so I've eliminated them in both frames */ | |
jmp _loop | |
_epilogue: | |
mov 0x10(%rsp), %rax | |
/* function epilogue */ | |
movq %rbp, %rsp /* restore stack top */ | |
popq %rbp /* restore stack bottom */ | |
ret | |
/* this is an example of what the tree compiler should reasonably be able to do, | |
* without truly fancy optimizations. The algorithm is the same, I've just added | |
* register selection and very minor instruction reductions based on constants. */ | |
fib1: | |
pushq %rbp | |
movq %rsp, %rbp | |
movq $1, %rax | |
movq $0, %rcx | |
movq %rdi, %rdx | |
_loop2: | |
/* if_i(cmp_i(r0, const_i(0)), _epilogue2) == unless_i(r0, epilogue2) */ | |
test %rdx, %rdx | |
jz _epilogue2 | |
/* add_i */ | |
addq %rcx, %rax | |
/* sub_i. I'm sure I can find a way to optimze this further, but that would not be realistic in general */ | |
movq %rax, %r8 | |
subq %rcx, %r8 | |
movq %r8, %rcx | |
/* sub_i(r0, const_i(1)) == dec_i(r0). note that decq isn't particularly fast */ | |
decq %rdx | |
/* goto */ | |
jmp _loop2 | |
_epilogue2: | |
movq %rcx, %rax | |
movq %rbp, %rsp | |
popq %rbp | |
ret |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nqp-m | |
# what can i say, i like the fibonacci series | |
sub fib(int $n) { | |
my int $a := 0; | |
my int $b := 1; | |
while $n > 0 { | |
$b := $b + $a; | |
$a := $b - $a; | |
$n := $n - 1; | |
} | |
return $a; | |
} | |
my int $i := 0; | |
while $i < 60 { | |
nqp::say(fib($i)); | |
$i := $i + 1; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# With fib0 as the algorithm: | |
$ make bar | |
$ time ./bar | |
real 0m7.508s | |
user 0m7.513s | |
sys 0m0.001s | |
# now edit the file and use fib1 | |
$ make bar | |
$ time ./bar | |
real 0m1.414s | |
user 0m1.414s | |
sys 0m0.001s | |
# 0.7508 / 0.1414 = 5,3x speedup. Your Mileage Will Vary. | |
# This is admittedly a very tiny and somewhat artificial example! | |
# Not nearly all code will benefit in the same way. | |
# On the other hand, this example only demonstrates one of all feasible optimizations. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment