Skip to content

Instantly share code, notes, and snippets.

@JuxhinDB
Last active April 17, 2021 15:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JuxhinDB/09190fdc7040d76dbe67c0b8cd683416 to your computer and use it in GitHub Desktop.
Save JuxhinDB/09190fdc7040d76dbe67c0b8cd683416 to your computer and use it in GitHub Desktop.
ASM generated by a slow parity algorithm
hamming_code_simd::hamming::fast_parity (src/hamming.rs:66):
eor x8, x0, x0, lsr, #1
eor x8, x8, x8, lsr, #2
eor x8, x8, x8, lsr, #4
eor x8, x8, x8, lsr, #8
eor x8, x8, x8, lsr, #16
lsr x9, x8, #32
eor w8, w9, w8
and x0, x8, #0x1
ret
pub fn parity(code: &u64, i: u32) -> bool {
mov w9, #1
let bi = (0b1 << i) - 1;
lsl w8, w9, w1
sub w10, w8, #1
cmp w10, #63
b.gt LBB2_3
mov w12, #0
mov w10, #0
ldr x11, [x0]
sub w13, w8, #2
mov w9, #1
LBB2_2:
add w13, w13, #1
if !ignore && (code & 0b1 << j) != 0b0 {
lsr x14, x11, x13
and w14, w14, #0x1
eor w14, w9, w14
tst w12, #0x1
csel w9, w9, w14, ne
counter += 1;
add w14, w10, #1
if counter >= 0b1 << i {
cmp w14, w8
cset w14, lt
if counter >= 0b1 << i {
csinc w10, wzr, w10, ge
eor w12, w14, w12
eor w12, w12, #0x1
cmp w13, #63
b.lt LBB2_2
LBB2_3:
}
and w0, w9, #0x1
ret
hamming_code_simd::hamming::slow_parity (src/hamming.rs:78):
dup.2d v0, x0
Lloh78:
adrp x8, lCPI3_0@PAGE
Lloh79:
ldr q1, [x8, lCPI3_0@PAGEOFF]
Lloh80:
adrp x8, lCPI3_1@PAGE
Lloh81:
ldr q2, [x8, lCPI3_1@PAGEOFF]
Lloh82:
adrp x8, lCPI3_2@PAGE
Lloh83:
ldr q3, [x8, lCPI3_2@PAGEOFF]
Lloh84:
adrp x8, lCPI3_3@PAGE
Lloh85:
ldr q4, [x8, lCPI3_3@PAGEOFF]
Lloh86:
adrp x8, lCPI3_4@PAGE
Lloh87:
ldr q5, [x8, lCPI3_4@PAGEOFF]
Lloh88:
adrp x8, lCPI3_5@PAGE
Lloh89:
ldr q6, [x8, lCPI3_5@PAGEOFF]
Lloh90:
adrp x8, lCPI3_6@PAGE
Lloh91:
ldr q7, [x8, lCPI3_6@PAGEOFF]
Lloh92:
adrp x8, lCPI3_7@PAGE
Lloh93:
ldr q16, [x8, lCPI3_7@PAGEOFF]
cmtst.2d v16, v0, v16
cmtst.2d v7, v0, v7
xtn.2s v7, v7
xtn2.4s v7, v16
cmtst.2d v6, v0, v6
cmtst.2d v5, v0, v5
xtn.2s v5, v5
xtn2.4s v5, v6
xtn.4h v5, v5
xtn2.8h v5, v7
cmtst.2d v4, v0, v4
cmtst.2d v3, v0, v3
xtn.2s v3, v3
xtn2.4s v3, v4
cmtst.2d v2, v0, v2
cmtst.2d v1, v0, v1
xtn.2s v1, v1
xtn2.4s v1, v2
xtn.4h v1, v1
xtn2.8h v1, v3
xtn.8b v1, v1
xtn2.16b v1, v5
Lloh94:
adrp x8, lCPI3_8@PAGE
Lloh95:
ldr q2, [x8, lCPI3_8@PAGEOFF]
Lloh96:
adrp x8, lCPI3_9@PAGE
Lloh97:
ldr q3, [x8, lCPI3_9@PAGEOFF]
Lloh98:
adrp x8, lCPI3_10@PAGE
Lloh99:
ldr q4, [x8, lCPI3_10@PAGEOFF]
Lloh100:
adrp x8, lCPI3_11@PAGE
Lloh101:
ldr q5, [x8, lCPI3_11@PAGEOFF]
Lloh102:
adrp x8, lCPI3_12@PAGE
Lloh103:
ldr q6, [x8, lCPI3_12@PAGEOFF]
Lloh104:
adrp x8, lCPI3_13@PAGE
Lloh105:
ldr q7, [x8, lCPI3_13@PAGEOFF]
Lloh106:
adrp x8, lCPI3_14@PAGE
Lloh107:
ldr q16, [x8, lCPI3_14@PAGEOFF]
Lloh108:
adrp x8, lCPI3_15@PAGE
Lloh109:
ldr q17, [x8, lCPI3_15@PAGEOFF]
cmtst.2d v17, v0, v17
cmtst.2d v16, v0, v16
xtn.2s v16, v16
xtn2.4s v16, v17
cmtst.2d v7, v0, v7
cmtst.2d v6, v0, v6
xtn.2s v6, v6
xtn2.4s v6, v7
xtn.4h v6, v6
xtn2.8h v6, v16
cmtst.2d v5, v0, v5
cmtst.2d v4, v0, v4
xtn.2s v4, v4
xtn2.4s v4, v5
cmtst.2d v3, v0, v3
cmtst.2d v2, v0, v2
xtn.2s v2, v2
xtn2.4s v2, v3
xtn.4h v2, v2
xtn2.8h v2, v4
xtn.8b v2, v2
xtn2.16b v2, v6
eor.16b v1, v1, v2
Lloh110:
adrp x8, lCPI3_16@PAGE
Lloh111:
ldr q2, [x8, lCPI3_16@PAGEOFF]
Lloh112:
adrp x8, lCPI3_17@PAGE
Lloh113:
ldr q3, [x8, lCPI3_17@PAGEOFF]
Lloh114:
adrp x8, lCPI3_18@PAGE
Lloh115:
ldr q4, [x8, lCPI3_18@PAGEOFF]
Lloh116:
adrp x8, lCPI3_19@PAGE
Lloh117:
ldr q5, [x8, lCPI3_19@PAGEOFF]
Lloh118:
adrp x8, lCPI3_20@PAGE
Lloh119:
ldr q6, [x8, lCPI3_20@PAGEOFF]
Lloh120:
adrp x8, lCPI3_21@PAGE
Lloh121:
ldr q7, [x8, lCPI3_21@PAGEOFF]
Lloh122:
adrp x8, lCPI3_22@PAGE
Lloh123:
ldr q16, [x8, lCPI3_22@PAGEOFF]
Lloh124:
adrp x8, lCPI3_23@PAGE
Lloh125:
ldr q17, [x8, lCPI3_23@PAGEOFF]
Lloh126:
adrp x8, lCPI3_24@PAGE
Lloh127:
ldr q18, [x8, lCPI3_24@PAGEOFF]
cmtst.2d v18, v0, v18
cmtst.2d v17, v0, v17
xtn.2s v17, v17
xtn2.4s v17, v18
cmtst.2d v16, v0, v16
cmtst.2d v7, v0, v7
xtn.2s v7, v7
xtn2.4s v7, v16
xtn.4h v7, v7
xtn2.8h v7, v17
cmtst.2d v6, v0, v6
cmtst.2d v5, v0, v5
xtn.2s v5, v5
xtn2.4s v5, v6
cmtst.2d v4, v0, v4
cmtst.2d v0, v0, v3
xtn.2s v0, v0
xtn2.4s v0, v4
xtn.4h v0, v0
xtn2.8h v0, v5
xtn.8b v0, v0
xtn2.16b v0, v7
eor.16b v0, v1, v0
eor.16b v0, v0, v2
ext.16b v1, v0, v0, #8
eor.16b v0, v0, v1
ext.16b v1, v0, v0, #4
eor.16b v0, v0, v1
ext.16b v1, v0, v0, #2
eor.16b v0, v0, v1
dup.16b v1, v0[1]
eor.16b v0, v0, v1
umov.b w8, v0[0]
ubfx x9, x0, #48, #1
eor w8, w8, w9
ubfx x9, x0, #49, #1
eor w8, w8, w9
ubfx x9, x0, #50, #1
eor w8, w8, w9
ubfx x9, x0, #51, #1
eor w8, w8, w9
ubfx x9, x0, #52, #1
eor w8, w8, w9
ubfx x9, x0, #53, #1
eor w8, w8, w9
ubfx x9, x0, #54, #1
eor w8, w8, w9
ubfx x9, x0, #55, #1
eor w8, w8, w9
ubfx x9, x0, #56, #1
eor w8, w8, w9
ubfx x9, x0, #57, #1
eor w8, w8, w9
ubfx x9, x0, #58, #1
eor w8, w8, w9
ubfx x9, x0, #59, #1
eor w8, w8, w9
ubfx x9, x0, #60, #1
eor w8, w8, w9
lsr x9, x0, #61
eor w8, w8, w9
lsr x9, x0, #62
eor w8, w8, w9
and w0, w8, #0x1
ret
pub fn parity(code: &u64, i: u32) -> bool {
mov w8, #-1
let mut j = (0b1 << i) - 1;
lsl w8, w8, w1
while j < 64 {
cmn w8, #64
b.lo LBB2_4
let mut j = (0b1 << i) - 1;
mvn w10, w8
ldr x9, [x0]
mov w8, #1
LBB2_2:
if (code & 0b1 << j) != 0b0 {
lsr x11, x9, x10
and w11, w11, #0x1
eor w8, w8, w11
add w10, w10, w1
lsl w10, w10, #1
j += j + 2 * (i + 1);
add w10, w10, #2
while j < 64 {
cmp w10, #64
b.lo LBB2_2
}
and w0, w8, #0x1
ret
LBB2_4:
mov w8, #1
and w0, w8, #0x1
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment