Skip to content

Instantly share code, notes, and snippets.

@hnakamur
Created July 17, 2024 13:41
Show Gist options
  • Save hnakamur/12f88264c8dcdc611231e0b755269ba5 to your computer and use it in GitHub Desktop.
Save hnakamur/12f88264c8dcdc611231e0b755269ba5 to your computer and use it in GitHub Desktop.
SIMD experiment with Odin
package main
import "core:fmt"
import "core:simd"
main :: proc() {
i := index_any([]u8{'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '\r', '&', 'a'}, 16)
fmt.println(i)
}
ch1: u8 = '&'
ch2: u8 = '\r'
lane :: 16
needle1 := simd.from_array([lane]u8{ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1, ch1,})
needle2 := simd.from_array([lane]u8{ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2, ch2,})
index_any :: proc(haystack: []u8, len: int) -> int {
offset: int = 0
for offset + lane <= len {
v := simd.from_slice(simd.u8x16, haystack[offset:offset+lane])
eq1 := simd.lanes_eq(v, needle1)
eq2 := simd.lanes_eq(v, needle2)
if simd.reduce_or(eq1) == 0xff || simd.reduce_or(eq2) == 0xff {
break
}
offset += lane
}
for i in offset..<len {
if haystack[i] == ch1 || haystack[i] == ch2 {
return i
}
}
return -1
}
# ...(snip)...
.p2align 4, 0x90
.type main.index_any,@function
main.index_any:
.cfi_startproc
subq $216, %rsp
.cfi_def_cfa_offset 224
movq %rcx, 40(%rsp)
movq %rdx, 48(%rsp)
movq %rsi, 56(%rsp)
movq %rdi, 64(%rsp)
movq 48(%rsp), %rax
movq 56(%rsp), %rcx
movq 64(%rsp), %rdx
movq %rdx, 200(%rsp)
movq %rcx, 208(%rsp)
movq %rax, 192(%rsp)
movq $0, 184(%rsp)
.LBB28_2:
movq 48(%rsp), %rcx
movq 184(%rsp), %rax
addq $16, %rax
cmpq %rcx, %rax
setle %al
andb $1, %al
cmpb $0, %al
je .LBB28_7
movq 184(%rsp), %r8
movq %r8, 32(%rsp)
movq %r8, %r9
addq $16, %r9
movq 208(%rsp), %rcx
movq %rsp, %rax
movq %rcx, (%rax)
movl $.Lcsbs$da, %edi
movl $42, %esi
movl $20, %edx
movl $50, %ecx
callq runtime.slice_expr_error_lo_hi
movq 32(%rsp), %rcx
movq 40(%rsp), %rdx
movq 200(%rsp), %rax
addq %rcx, %rax
movq %rax, 168(%rsp)
movq $16, 176(%rsp)
movq 168(%rsp), %rdi
movq 176(%rsp), %rsi
callq "simd.from_slice-19570"
movsd %xmm1, 152(%rsp)
movq %xmm0, 144(%rsp)
movaps 144(%rsp), %xmm0
movaps %xmm0, 128(%rsp)
movaps 128(%rsp), %xmm0
movaps main.needle1(%rip), %xmm1
pcmpeqb %xmm1, %xmm0
movaps %xmm0, 112(%rsp)
movaps 128(%rsp), %xmm0
movaps main.needle2(%rip), %xmm1
pcmpeqb %xmm1, %xmm0
movaps %xmm0, 96(%rsp)
movaps 112(%rsp), %xmm0
pshufd $238, %xmm0, %xmm1
por %xmm1, %xmm0
pshufd $85, %xmm0, %xmm1
por %xmm1, %xmm0
movaps %xmm0, %xmm1
psrld $16, %xmm1
por %xmm1, %xmm0
movaps %xmm0, %xmm1
psrlw $8, %xmm1
por %xmm1, %xmm0
movd %xmm0, %eax
cmpb $-1, %al
sete %al
andb $1, %al
cmpb $0, %al
jne .LBB28_5
movaps 96(%rsp), %xmm0
pshufd $238, %xmm0, %xmm1
por %xmm1, %xmm0
pshufd $85, %xmm0, %xmm1
por %xmm1, %xmm0
movaps %xmm0, %xmm1
psrld $16, %xmm1
por %xmm1, %xmm0
movaps %xmm0, %xmm1
psrlw $8, %xmm1
por %xmm1, %xmm0
movd %xmm0, %eax
cmpb $-1, %al
sete %al
andb $1, %al
cmpb $0, %al
je .LBB28_6
.LBB28_5:
jmp .LBB28_7
.LBB28_6:
movq 184(%rsp), %rax
addq $16, %rax
movq %rax, 184(%rsp)
jmp .LBB28_2
.LBB28_7:
movq 184(%rsp), %rax
movq %rax, 88(%rsp)
movq $0, 80(%rsp)
.LBB28_8:
movq 48(%rsp), %rax
cmpq %rax, 88(%rsp)
jge .LBB28_14
movq 56(%rsp), %r9
movq 88(%rsp), %rax
movq %rax, 72(%rsp)
movq 72(%rsp), %r8
movq %r8, 24(%rsp)
movl $.Lcsbs$da, %edi
movl $42, %esi
movl $32, %edx
movl $21, %ecx
callq runtime.bounds_check_error
movq 24(%rsp), %rcx
movq 64(%rsp), %rax
movb (%rax,%rcx), %al
cmpb main.ch1, %al
sete %al
andb $1, %al
cmpb $0, %al
jne .LBB28_11
movq 56(%rsp), %r9
movq 72(%rsp), %r8
movq %r8, 16(%rsp)
movl $.Lcsbs$da, %edi
movl $42, %esi
movl $32, %edx
movl $43, %ecx
callq runtime.bounds_check_error
movq 16(%rsp), %rcx
movq 64(%rsp), %rax
movb (%rax,%rcx), %al
cmpb main.ch2, %al
sete %al
andb $1, %al
cmpb $0, %al
je .LBB28_12
.LBB28_11:
movq 72(%rsp), %rax
addq $216, %rsp
.cfi_def_cfa_offset 8
retq
.LBB28_12:
.cfi_def_cfa_offset 224
jmp .LBB28_13
.LBB28_13:
movq 88(%rsp), %rax
addq $1, %rax
movq %rax, 88(%rsp)
movq 80(%rsp), %rax
addq $1, %rax
movq %rax, 80(%rsp)
jmp .LBB28_8
.LBB28_14:
movq $-1, %rax
addq $216, %rsp
.cfi_def_cfa_offset 8
retq
.Lfunc_end28:
.size main.index_any, .Lfunc_end28-main.index_any
.cfi_endproc
# ...(snip)...
@hnakamur
Copy link
Author

My attempt to port the example at https://x.com/orisano/status/1813187886910697632 to Odin.

odin-simd-experiment.S is build with the following command:

$ odin build . -build-mode:asm
$ odin version
odin version dev-2024-07-nightly:b4ca044

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment