Created
August 26, 2012 17:16
-
-
Save orlp/3481749 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline int isqrt(uint32_t x) { | |
const uint32_t *p = squares; | |
int i = 32768; | |
while (i) { | |
if (p[i] <= x) p += i; | |
i >>= 1; | |
} | |
return p - squares; | |
} | |
_isqrt: | |
pushl %ebp | |
movl %esp, %ebp | |
movl 8(%ebp), %edx | |
cmpl $1073741824, %edx | |
jae L2 | |
movl $_squares, %eax | |
L3: | |
leal 65536(%eax), %ecx | |
cmpl %edx, 65536(%eax) | |
ja L5 | |
movl %ecx, %eax | |
L5: | |
leal 32768(%eax), %ecx | |
cmpl %edx, 32768(%eax) | |
ja L7 | |
movl %ecx, %eax | |
L7: | |
leal 16384(%eax), %ecx | |
cmpl %edx, 16384(%eax) | |
ja L9 | |
movl %ecx, %eax | |
L9: | |
leal 8192(%eax), %ecx | |
cmpl %edx, 8192(%eax) | |
ja L11 | |
movl %ecx, %eax | |
L11: | |
leal 4096(%eax), %ecx | |
cmpl %edx, 4096(%eax) | |
ja L13 | |
movl %ecx, %eax | |
L13: | |
leal 2048(%eax), %ecx | |
cmpl %edx, 2048(%eax) | |
ja L15 | |
movl %ecx, %eax | |
L15: | |
leal 1024(%eax), %ecx | |
cmpl %edx, 1024(%eax) | |
ja L17 | |
movl %ecx, %eax | |
L17: | |
leal 512(%eax), %ecx | |
cmpl %edx, 512(%eax) | |
ja L19 | |
movl %ecx, %eax | |
L19: | |
leal 256(%eax), %ecx | |
cmpl %edx, 256(%eax) | |
ja L21 | |
movl %ecx, %eax | |
L21: | |
leal 128(%eax), %ecx | |
cmpl %edx, 128(%eax) | |
ja L23 | |
movl %ecx, %eax | |
L23: | |
leal 64(%eax), %ecx | |
cmpl %edx, 64(%eax) | |
ja L25 | |
movl %ecx, %eax | |
L25: | |
leal 32(%eax), %ecx | |
cmpl %edx, 32(%eax) | |
ja L27 | |
movl %ecx, %eax | |
L27: | |
leal 16(%eax), %ecx | |
cmpl %edx, 16(%eax) | |
ja L29 | |
movl %ecx, %eax | |
L29: | |
leal 8(%eax), %ecx | |
cmpl %edx, 8(%eax) | |
ja L31 | |
movl %ecx, %eax | |
L31: | |
leal 4(%eax), %ecx | |
cmpl %edx, 4(%eax) | |
ja L33 | |
movl %ecx, %eax | |
L33: | |
subl $_squares, %eax | |
sarl $2, %eax | |
leave | |
ret | |
.p2align 2,,3 | |
L2: | |
movl $_squares+131072, %eax | |
jmp L3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline int isqrt(uint32_t x) { | |
const uint32_t *p = squares; | |
if (p[32768] <= x) p += 32768; | |
if (p[16384] <= x) p += 16384; | |
if (p[8192] <= x) p += 8192; | |
if (p[4096] <= x) p += 4096; | |
if (p[2048] <= x) p += 2048; | |
if (p[1024] <= x) p += 1024; | |
if (p[512] <= x) p += 512; | |
if (p[256] <= x) p += 256; | |
if (p[128] <= x) p += 128; | |
if (p[ 64] <= x) p += 64; | |
if (p[ 32] <= x) p += 32; | |
if (p[ 16] <= x) p += 16; | |
if (p[ 8] <= x) p += 8; | |
if (p[ 4] <= x) p += 4; | |
if (p[ 2] <= x) p += 2; | |
if (p[ 1] <= x) p += 1; | |
return p - squares; | |
} | |
_isqrt: | |
pushl %ebp | |
movl %esp, %ebp | |
pushl %ebx | |
movl 8(%ebp), %ecx | |
cmpl $1073741823, %ecx | |
ja L18 | |
movl $_squares+32768, %eax | |
movl $_squares+65536, %ebx | |
movl $_squares, %edx | |
L2: | |
cmpl (%ebx), %ecx | |
jb L3 | |
leal 32768(%ebx), %eax | |
movl %ebx, %edx | |
L3: | |
cmpl (%eax), %ecx | |
jb L4 | |
movl %eax, %edx | |
L4: | |
leal 16384(%edx), %eax | |
cmpl 16384(%edx), %ecx | |
jb L5 | |
movl %eax, %edx | |
L5: | |
leal 8192(%edx), %eax | |
cmpl 8192(%edx), %ecx | |
jb L6 | |
movl %eax, %edx | |
L6: | |
leal 4096(%edx), %eax | |
cmpl 4096(%edx), %ecx | |
jb L7 | |
movl %eax, %edx | |
L7: | |
leal 2048(%edx), %eax | |
cmpl 2048(%edx), %ecx | |
jb L8 | |
movl %eax, %edx | |
L8: | |
leal 1024(%edx), %eax | |
cmpl 1024(%edx), %ecx | |
jb L9 | |
movl %eax, %edx | |
L9: | |
leal 512(%edx), %eax | |
cmpl 512(%edx), %ecx | |
jb L10 | |
movl %eax, %edx | |
L10: | |
leal 256(%edx), %eax | |
cmpl 256(%edx), %ecx | |
jb L11 | |
movl %eax, %edx | |
L11: | |
leal 128(%edx), %eax | |
cmpl 128(%edx), %ecx | |
jb L12 | |
movl %eax, %edx | |
L12: | |
leal 64(%edx), %eax | |
cmpl 64(%edx), %ecx | |
jb L13 | |
movl %eax, %edx | |
L13: | |
leal 32(%edx), %eax | |
cmpl 32(%edx), %ecx | |
jb L14 | |
movl %eax, %edx | |
L14: | |
leal 16(%edx), %eax | |
cmpl 16(%edx), %ecx | |
jb L15 | |
movl %eax, %edx | |
L15: | |
leal 8(%edx), %eax | |
cmpl 8(%edx), %ecx | |
jb L16 | |
movl %eax, %edx | |
L16: | |
leal 4(%edx), %eax | |
cmpl 4(%edx), %ecx | |
jae L17 | |
movl %edx, %eax | |
L17: | |
subl $_squares, %eax | |
sarl $2, %eax | |
popl %ebx | |
leave | |
ret | |
.p2align 2,,3 | |
L18: | |
movl $_squares+163840, %eax | |
movl $_squares+196608, %ebx | |
movl $_squares+131072, %edx | |
jmp L2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment