Created
January 6, 2021 21:06
-
-
Save lemire/1c9e8827b45d057d7546e2743ad34496 to your computer and use it in GitHub Desktop.
Assembly output of the three functions from https://lemire.me/blog/2021/01/06/memory-access-on-the-apple-m1-processor/#comments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl _compute_two ; -- Begin function compute_two | |
.p2align 2 | |
_compute_two: ; @compute_two | |
.cfi_startproc | |
; %bb.0: | |
Lloh0: | |
adrp x8, _M@PAGE | |
Lloh1: | |
ldr x8, [x8, _M@PAGEOFF] | |
tst x8, #0x7fffffffffffffff | |
b.eq LBB0_5 | |
; %bb.1: | |
lsl x9, x8, #1 | |
cmp x9, #2 ; =2 | |
mov w8, #2 | |
csel x8, x9, x8, hi | |
sub x8, x8, #1 ; =1 | |
lsr x10, x8, #1 | |
cbz x10, LBB0_6 | |
; %bb.2: | |
mov w8, #0 | |
mov w13, #0 | |
add x11, x10, #1 ; =1 | |
and x12, x11, #0xfffffffffffffffe | |
lsl x10, x12, #1 | |
add x14, x2, #8 ; =8 | |
mov x15, x12 | |
LBB0_3: ; =>This Inner Loop Header: Depth=1 | |
ldpsw x16, x17, [x14, #-8] | |
ldrb w16, [x0, x16] | |
ldpsw x1, x3, [x14], #16 | |
ldrb w1, [x0, x1] | |
ldrb w17, [x0, x17] | |
ldrb w3, [x0, x3] | |
eor w16, w17, w16 | |
eor w17, w3, w1 | |
add w8, w8, w16, sxtb | |
add w13, w13, w17, sxtb | |
subs x15, x15, #2 ; =2 | |
b.ne LBB0_3 | |
; %bb.4: | |
add w8, w13, w8 | |
cmp x11, x12 | |
b.ne LBB0_7 | |
b LBB0_8 | |
LBB0_5: | |
mov w0, #0 | |
ret | |
LBB0_6: | |
mov w8, #0 | |
LBB0_7: ; =>This Inner Loop Header: Depth=1 | |
add x11, x2, x10, lsl #2 | |
ldpsw x12, x11, [x11] | |
ldrb w12, [x0, x12] | |
ldrb w11, [x0, x11] | |
eor w11, w11, w12 | |
add w8, w8, w11, sxtb | |
add x10, x10, #2 ; =2 | |
cmp x10, x9 | |
b.lo LBB0_7 | |
LBB0_8: | |
mov x0, x8 | |
ret | |
.loh AdrpLdr Lloh0, Lloh1 | |
.cfi_endproc | |
; -- End function | |
.globl _compute_two_plus ; -- Begin function compute_two_plus | |
.p2align 2 | |
_compute_two_plus: ; @compute_two_plus | |
.cfi_startproc | |
; %bb.0: | |
Lloh2: | |
adrp x8, _M@PAGE | |
Lloh3: | |
ldr x10, [x8, _M@PAGEOFF] | |
tst x10, #0x7fffffffffffffff | |
b.eq LBB1_4 | |
; %bb.1: | |
mov w8, #0 | |
mov x9, #0 | |
lsl x10, x10, #1 | |
LBB1_2: ; =>This Inner Loop Header: Depth=1 | |
add x11, x2, x9, lsl #2 | |
ldpsw x12, x11, [x11] | |
add x12, x0, x12 | |
ldrb w13, [x12] | |
ldrb w12, [x12, #1] | |
eor w12, w12, w13 | |
add x11, x0, x11 | |
ldrb w13, [x11] | |
eor w12, w12, w13 | |
ldrb w11, [x11, #1] | |
eor w11, w12, w11 | |
add w8, w8, w11, sxtb | |
add x9, x9, #2 ; =2 | |
cmp x9, x10 | |
b.lo LBB1_2 | |
; %bb.3: | |
mov x0, x8 | |
ret | |
LBB1_4: | |
mov w8, #0 | |
mov x0, x8 | |
ret | |
.loh AdrpLdr Lloh2, Lloh3 | |
.cfi_endproc | |
; -- End function | |
.globl _compute_three ; -- Begin function compute_three | |
.p2align 2 | |
_compute_three: ; @compute_three | |
.cfi_startproc | |
; %bb.0: | |
Lloh4: | |
adrp x8, _M@PAGE | |
Lloh5: | |
ldr x8, [x8, _M@PAGEOFF] | |
adds x9, x8, x8, lsl #1 | |
b.eq LBB2_4 | |
; %bb.1: | |
mov x10, #0 | |
mov w8, #0 | |
LBB2_2: ; =>This Inner Loop Header: Depth=1 | |
add x11, x2, x10, lsl #2 | |
ldpsw x12, x13, [x11] | |
ldrb w12, [x0, x12] | |
ldrb w13, [x0, x13] | |
eor w12, w13, w12 | |
ldrsw x11, [x11, #8] | |
ldrb w11, [x0, x11] | |
eor w11, w12, w11 | |
add w8, w8, w11, sxtb | |
add x10, x10, #3 ; =3 | |
cmp x10, x9 | |
b.lo LBB2_2 | |
; %bb.3: | |
mov x0, x8 | |
ret | |
LBB2_4: | |
mov w8, #0 | |
mov x0, x8 | |
ret | |
.loh AdrpLdr Lloh4, Lloh5 | |
.cfi_endproc | |
; -- End function |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment