Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created September 24, 2015 21:18
Embed
What would you like to do?
#include <stdint.h>
static uint32_t const N = 16384; // just an example
// x in [a,b]?
// none of x, a, b need to be reduced beforehand.
static bool point_in_interval1(uint32_t x, uint32_t a, uint32_t b)
{
// can skip second AND if known ahead of time that (b-a) < N.
return ((x - a) & (N - 1)) < ((b - a) & (N - 1));
}
static bool point_in_interval2(uint32_t x, uint32_t a, uint32_t b)
{
// reduce mod N first, otherwise the test doesn't work
uint32_t xr = x & (N - 1);
uint32_t ar = a & (N - 1);
uint32_t br = b & (N - 1);
return (xr < ar) ^ (ar <= br) ^ (br < xr);
}
// when x, a, b are reduced beforehand
static bool point_in_interval3(uint32_t x, uint32_t a, uint32_t b)
{
return (x < a) ^ (a <= b) ^ (b < x);
}
extern void f();
// Disassembly for these functions is the interesting bit since it's how actual usage looks
void test1(uint32_t x, uint32_t a, uint32_t b)
{
if (point_in_interval1(x, a, b))
f();
}
void test2(uint32_t x, uint32_t a, uint32_t b)
{
if (point_in_interval2(x, a, b))
f();
}
void test3(uint32_t x, uint32_t a, uint32_t b)
{
if (point_in_interval3(x, a, b))
f();
}
.syntax unified
.eabi_attribute 6, 2
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.file "ival.cpp"
.text
.globl _Z5test1jjj
.align 2
.type _Z5test1jjj,%function
_Z5test1jjj:
push {r11, lr}
sub r3, r2, r1
mov r2, #255
sub r0, r0, r1
mov r11, sp
orr r2, r2, #16128
and r3, r3, r2
and r0, r0, r2
cmp r0, r3
bhs .LBB0_2
bl _Z1fv
.LBB0_2:
pop {r11, lr}
bx lr
.Ltmp0:
.size _Z5test1jjj, .Ltmp0-_Z5test1jjj
.globl _Z5test2jjj
.align 2
.type _Z5test2jjj,%function
_Z5test2jjj:
push {r11, lr}
mov r12, #255
mov r11, sp
orr r12, r12, #16128
and lr, r2, r12
and r3, r1, r12
mov r2, #0
and r0, r0, r12
mov r1, #0
cmp r3, lr
movls r2, #1
cmp r0, r3
mov r3, #0
movlo r3, #1
cmp lr, r0
eor r2, r3, r2
movlo r1, #1
teq r2, r1
beq .LBB1_2
bl _Z1fv
.LBB1_2:
pop {r11, lr}
bx lr
.Ltmp1:
.size _Z5test2jjj, .Ltmp1-_Z5test2jjj
.globl _Z5test3jjj
.align 2
.type _Z5test3jjj,%function
_Z5test3jjj:
push {r11, lr}
cmp r1, r2
mov r3, #0
mov r12, #0
mov r11, sp
movls r3, #1
cmp r0, r1
mov r1, #0
movlo r1, #1
cmp r2, r0
eor r1, r1, r3
movlo r12, #1
teq r1, r12
beq .LBB2_2
bl _Z1fv
.LBB2_2:
pop {r11, lr}
bx lr
.Ltmp2:
.size _Z5test3jjj, .Ltmp2-_Z5test3jjj
.section .mdebug.abi32
.previous
.file "ival.cpp"
.text
.globl _Z5test1jjj
.align 2
.type _Z5test1jjj,@function
.set nomips16
.ent _Z5test1jjj
_Z5test1jjj:
.cfi_startproc
.frame $fp,24,$ra
.mask 0xc0000000,-4
.fmask 0x00000000,0
.set noreorder
.set nomacro
.set noat
lui $2, %hi(_gp_disp)
addiu $2, $2, %lo(_gp_disp)
addiu $sp, $sp, -24
$tmp3:
.cfi_def_cfa_offset 24
sw $ra, 20($sp)
sw $fp, 16($sp)
$tmp4:
.cfi_offset 31, -4
$tmp5:
.cfi_offset 30, -8
move $fp, $sp
$tmp6:
.cfi_def_cfa_register 30
addu $gp, $2, $25
subu $1, $6, $5
subu $2, $4, $5
andi $1, $1, 16383
andi $2, $2, 16383
sltu $1, $2, $1
beq $1, $zero, $BB0_2
nop
lw $25, %call16(_Z1fv)($gp)
jalr $25
nop
$BB0_2:
move $sp, $fp
lw $fp, 16($sp)
lw $ra, 20($sp)
jr $ra
addiu $sp, $sp, 24
.set at
.set macro
.set reorder
.end _Z5test1jjj
$tmp7:
.size _Z5test1jjj, ($tmp7)-_Z5test1jjj
.cfi_endproc
.globl _Z5test2jjj
.align 2
.type _Z5test2jjj,@function
.set nomips16
.ent _Z5test2jjj
_Z5test2jjj:
.cfi_startproc
.frame $fp,24,$ra
.mask 0xc0000000,-4
.fmask 0x00000000,0
.set noreorder
.set nomacro
.set noat
lui $2, %hi(_gp_disp)
addiu $2, $2, %lo(_gp_disp)
addiu $sp, $sp, -24
$tmp11:
.cfi_def_cfa_offset 24
sw $ra, 20($sp)
sw $fp, 16($sp)
$tmp12:
.cfi_offset 31, -4
$tmp13:
.cfi_offset 30, -8
move $fp, $sp
$tmp14:
.cfi_def_cfa_register 30
addu $gp, $2, $25
andi $1, $4, 16383
andi $2, $5, 16383
andi $4, $6, 16383
sltu $3, $1, $2
sltu $2, $4, $2
sltu $1, $4, $1
xori $2, $2, 1
xor $2, $3, $2
xor $1, $2, $1
addiu $2, $zero, 1
bne $1, $2, $BB1_2
nop
lw $25, %call16(_Z1fv)($gp)
jalr $25
nop
$BB1_2:
move $sp, $fp
lw $fp, 16($sp)
lw $ra, 20($sp)
jr $ra
addiu $sp, $sp, 24
.set at
.set macro
.set reorder
.end _Z5test2jjj
$tmp15:
.size _Z5test2jjj, ($tmp15)-_Z5test2jjj
.cfi_endproc
.globl _Z5test3jjj
.align 2
.type _Z5test3jjj,@function
.set nomips16
.ent _Z5test3jjj
_Z5test3jjj:
.cfi_startproc
.frame $fp,24,$ra
.mask 0xc0000000,-4
.fmask 0x00000000,0
.set noreorder
.set nomacro
.set noat
lui $2, %hi(_gp_disp)
addiu $2, $2, %lo(_gp_disp)
addiu $sp, $sp, -24
$tmp19:
.cfi_def_cfa_offset 24
sw $ra, 20($sp)
sw $fp, 16($sp)
$tmp20:
.cfi_offset 31, -4
$tmp21:
.cfi_offset 30, -8
move $fp, $sp
$tmp22:
.cfi_def_cfa_register 30
addu $gp, $2, $25
sltu $2, $6, $5
sltu $1, $4, $5
xori $2, $2, 1
xor $1, $1, $2
sltu $2, $6, $4
xor $1, $1, $2
addiu $2, $zero, 1
bne $1, $2, $BB2_2
nop
lw $25, %call16(_Z1fv)($gp)
jalr $25
nop
$BB2_2:
move $sp, $fp
lw $fp, 16($sp)
lw $ra, 20($sp)
jr $ra
addiu $sp, $sp, 24
.set at
.set macro
.set reorder
.end _Z5test3jjj
$tmp23:
.size _Z5test3jjj, ($tmp23)-_Z5test3jjj
.cfi_endproc
; Listing generated by Microsoft (R) Optimizing Compiler Version 17.00.50727.1
include listing.inc
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ?test1@@YAXIII@Z ; test1
PUBLIC ?test2@@YAXIII@Z ; test2
PUBLIC ?test3@@YAXIII@Z ; test3
EXTRN ?f@@YAXXZ:PROC ; f
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test3@@YAXIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?test3@@YAXIII@Z PROC ; test3, COMDAT
; 44 : if (point_in_interval3(x, a, b))
cmp ecx, edx
setb r9b
cmp r8d, ecx
setb al
xor r9b, al
cmp edx, r8d
setbe al
xor r9b, al
jne ?f@@YAXXZ ; f
$LN1@test3:
; 45 : f();
; 46 : }
fatret 0
?test3@@YAXIII@Z ENDP ; test3
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test2@@YAXIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?test2@@YAXIII@Z PROC ; test2, COMDAT
; 37 : {
mov r9d, r8d
; 38 : if (point_in_interval2(x, a, b))
and edx, 16383 ; 00003fffH
and ecx, 16383 ; 00003fffH
and r9d, 16383 ; 00003fffH
cmp edx, r9d
setbe r8b
cmp ecx, edx
setb al
xor r8b, al
cmp r9d, ecx
setb al
xor r8b, al
jne ?f@@YAXXZ ; f
$LN1@test2:
; 39 : f();
; 40 : }
fatret 0
?test2@@YAXIII@Z ENDP ; test2
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test1@@YAXIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?test1@@YAXIII@Z PROC ; test1, COMDAT
; 32 : if (point_in_interval1(x, a, b))
sub ecx, edx
sub r8d, edx
and ecx, 16383 ; 00003fffH
and r8d, 16383 ; 00003fffH
cmp ecx, r8d
jb ?f@@YAXXZ ; f
$LN1@test1:
; 33 : f();
; 34 : }
fatret 0
?test1@@YAXIII@Z ENDP ; test1
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval3@@YA_NIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?point_in_interval3@@YA_NIII@Z PROC ; point_in_interval3, COMDAT
; 25 : return (x < a) ^ (a <= b) ^ (b < x);
xor r10d, r10d
cmp ecx, edx
mov eax, r10d
setb al
mov r9d, r10d
cmp r8d, ecx
setb r9b
xor eax, r9d
cmp edx, r8d
setbe r10b
xor eax, r10d
; 26 : }
ret 0
?point_in_interval3@@YA_NIII@Z ENDP ; point_in_interval3
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval2@@YA_NIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?point_in_interval2@@YA_NIII@Z PROC ; point_in_interval2, COMDAT
; 15 : // reduce mod N first, otherwise the test doesn't work
; 16 : uint32_t xr = x & (N - 1);
; 17 : uint32_t ar = a & (N - 1);
; 18 : uint32_t br = b & (N - 1);
; 19 : return (xr < ar) ^ (ar <= br) ^ (br < xr);
xor r9d, r9d
mov r10d, ecx
and edx, 16383 ; 00003fffH
and r8d, 16383 ; 00003fffH
and r10d, 16383 ; 00003fffH
mov eax, r9d
cmp edx, r8d
mov ecx, r9d
setbe al
cmp r10d, edx
setb cl
xor eax, ecx
cmp r8d, r10d
setb r9b
xor eax, r9d
; 20 : }
ret 0
?point_in_interval2@@YA_NIII@Z ENDP ; point_in_interval2
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval1@@YA_NIII@Z
_TEXT SEGMENT
x$ = 8
a$ = 16
b$ = 24
?point_in_interval1@@YA_NIII@Z PROC ; point_in_interval1, COMDAT
; 9 : // can skip second AND if known ahead of time that (b-a) < N.
; 10 : return ((x - a) & (N - 1)) < ((b - a) & (N - 1));
sub ecx, edx
sub r8d, edx
and ecx, 16383 ; 00003fffH
and r8d, 16383 ; 00003fffH
cmp ecx, r8d
setb al
; 11 : }
ret 0
?point_in_interval1@@YA_NIII@Z ENDP ; point_in_interval1
_TEXT ENDS
END
; Listing generated by Microsoft (R) Optimizing Compiler Version 17.00.50727.1
TITLE C:\temp\ival\ival.cpp
.686P
.XMM
include listing.inc
.model flat
INCLUDELIB LIBCMT
INCLUDELIB OLDNAMES
PUBLIC ?test1@@YAXIII@Z ; test1
PUBLIC ?test2@@YAXIII@Z ; test2
PUBLIC ?test3@@YAXIII@Z ; test3
EXTRN ?f@@YAXXZ:PROC ; f
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test3@@YAXIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?test3@@YAXIII@Z PROC ; test3, COMDAT
; 44 : if (point_in_interval3(x, a, b))
mov eax, DWORD PTR _x$[esp-4]
mov edx, DWORD PTR _a$[esp-4]
cmp eax, edx
push ebx
setb bl
cmp DWORD PTR _b$[esp], eax
setb al
xor bl, al
cmp edx, DWORD PTR _b$[esp]
setbe al
xor bl, al
pop ebx
jne ?f@@YAXXZ ; f
; 45 : f();
; 46 : }
ret 0
?test3@@YAXIII@Z ENDP ; test3
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test2@@YAXIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?test2@@YAXIII@Z PROC ; test2, COMDAT
; 38 : if (point_in_interval2(x, a, b))
mov eax, DWORD PTR _a$[esp-4]
mov edx, DWORD PTR _b$[esp-4]
and eax, 16383 ; 00003fffH
push esi
mov esi, DWORD PTR _x$[esp]
and esi, 16383 ; 00003fffH
and edx, 16383 ; 00003fffH
cmp eax, edx
setbe cl
cmp esi, eax
setb al
xor cl, al
cmp edx, esi
setb al
xor cl, al
pop esi
jne ?f@@YAXXZ ; f
; 39 : f();
; 40 : }
ret 0
?test2@@YAXIII@Z ENDP ; test2
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?test1@@YAXIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?test1@@YAXIII@Z PROC ; test1, COMDAT
; 32 : if (point_in_interval1(x, a, b))
mov edx, DWORD PTR _x$[esp-4]
mov eax, DWORD PTR _b$[esp-4]
sub edx, DWORD PTR _a$[esp-4]
sub eax, DWORD PTR _a$[esp-4]
and edx, 16383 ; 00003fffH
and eax, 16383 ; 00003fffH
cmp edx, eax
jb ?f@@YAXXZ ; f
; 33 : f();
; 34 : }
ret 0
?test1@@YAXIII@Z ENDP ; test1
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval3@@YA_NIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?point_in_interval3@@YA_NIII@Z PROC ; point_in_interval3, COMDAT
; 25 : return (x < a) ^ (a <= b) ^ (b < x);
mov ecx, DWORD PTR _x$[esp-4]
cmp ecx, DWORD PTR _a$[esp-4]
mov edx, DWORD PTR _b$[esp-4]
sbb eax, eax
neg eax
cmp edx, ecx
sbb ecx, ecx
neg ecx
xor eax, ecx
cmp edx, DWORD PTR _a$[esp-4]
sbb ecx, ecx
inc ecx
xor eax, ecx
; 26 : }
ret 0
?point_in_interval3@@YA_NIII@Z ENDP ; point_in_interval3
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval2@@YA_NIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?point_in_interval2@@YA_NIII@Z PROC ; point_in_interval2, COMDAT
; 15 : // reduce mod N first, otherwise the test doesn't work
; 16 : uint32_t xr = x & (N - 1);
; 17 : uint32_t ar = a & (N - 1);
mov ecx, DWORD PTR _a$[esp-4]
; 18 : uint32_t br = b & (N - 1);
mov edx, DWORD PTR _b$[esp-4]
and ecx, 16383 ; 00003fffH
push esi
mov esi, DWORD PTR _x$[esp]
and esi, 16383 ; 00003fffH
and edx, 16383 ; 00003fffH
; 19 : return (xr < ar) ^ (ar <= br) ^ (br < xr);
cmp edx, ecx
sbb eax, eax
inc eax
cmp esi, ecx
sbb ecx, ecx
neg ecx
xor eax, ecx
cmp edx, esi
sbb ecx, ecx
neg ecx
xor eax, ecx
pop esi
; 20 : }
ret 0
?point_in_interval2@@YA_NIII@Z ENDP ; point_in_interval2
_TEXT ENDS
; Function compile flags: /Ogtpy
; File c:\temp\ival\ival.cpp
; COMDAT ?point_in_interval1@@YA_NIII@Z
_TEXT SEGMENT
_x$ = 8 ; size = 4
_a$ = 12 ; size = 4
_b$ = 16 ; size = 4
?point_in_interval1@@YA_NIII@Z PROC ; point_in_interval1, COMDAT
; 9 : // can skip second AND if known ahead of time that (b-a) < N.
; 10 : return ((x - a) & (N - 1)) < ((b - a) & (N - 1));
mov edx, DWORD PTR _x$[esp-4]
mov eax, DWORD PTR _b$[esp-4]
sub eax, DWORD PTR _a$[esp-4]
sub edx, DWORD PTR _a$[esp-4]
and eax, 16383 ; 00003fffH
and edx, 16383 ; 00003fffH
cmp edx, eax
sbb eax, eax
neg eax
; 11 : }
ret 0
?point_in_interval1@@YA_NIII@Z ENDP ; point_in_interval1
_TEXT ENDS
END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment