Created
May 30, 2023 16:34
-
-
Save Unam3dd/c61ef9824049f9671d1757b25ce68c8b to your computer and use it in GitHub Desktop.
Strlen SSE2 extension in assembly x86
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.intel_syntax noprefix | |
.global __vs_strlen_sse2 | |
.section .text | |
__vs_strlen_sse2: | |
endbr64 | |
test rdi, rdi | |
jz .L_null | |
pxor xmm0, xmm0 | |
pxor xmm1, xmm1 | |
pxor xmm2, xmm2 | |
pxor xmm3, xmm3 | |
mov rax, rdi | |
mov rcx, rdi | |
and rcx, 0xFFF | |
cmp rcx, 0xFCF | |
ja .L_addr_greater_than_4047 | |
movdqu xmm4, xmmword ptr [rax] | |
pcmpeqb xmm4, xmm0 | |
pmovmskb edx, xmm4 | |
test edx, edx | |
jz .L_greater_than_15 | |
bsf eax, edx | |
ret | |
.L_addr_greater_than_4047: | |
and rax, 0xFFFFFFFFFFFFFFC0 | |
pcmpeqb xmm0, xmmword ptr [rax] | |
pcmpeqb xmm1, xmmword ptr [rax+0x10] | |
pcmpeqb xmm2, xmmword ptr [rax+0x20] | |
pcmpeqb xmm3, xmmword ptr [rax+0x30] | |
pmovmskb esi, xmm0 | |
pmovmskb edx, xmm1 | |
pmovmskb r8d, xmm2 | |
pmovmskb ecx, xmm3 | |
shl rdx, 0x10 | |
shl rcx, 0x10 | |
or rdx, rsi | |
or rcx, r8 | |
shl rcx, 0x20 | |
or rdx, rcx | |
mov rcx, rdi | |
xor rcx, rax | |
and rax, 0xFFFFFFFFFFFFFFC0 | |
sar rdx, cl | |
test rdx, rdx | |
jz .L_reset_xmm_registers | |
bsf rax, rdx | |
ret | |
.L_greater_than_15: | |
and rax, 0xFFFFFFFFFFFFFFF0 | |
pcmpeqb xmm1, xmmword ptr [rax+0x10] | |
pcmpeqb xmm2, xmmword ptr [rax+0x20] | |
pcmpeqb xmm3, xmmword ptr [rax+0x30] | |
pmovmskb edx, xmm1 | |
pmovmskb r8d, xmm2 | |
pmovmskb ecx, xmm3 | |
shl rdx, 0x10 | |
shl rcx, 0x10 | |
or rcx, r8 | |
shl rcx, 0x20 | |
or rdx, rcx | |
mov rcx, rdi | |
xor rcx, rax | |
and rax, 0xFFFFFFFFFFFFFFC0 | |
sar rdx, cl | |
test rdx, rdx | |
jz .L_greater_than_64 | |
bsf rax, rdx | |
ret | |
.L_reset_xmm_registers: | |
pxor xmm0, xmm0 | |
pxor xmm1, xmm1 | |
pxor xmm2, xmm2 | |
pxor xmm3, xmm3 | |
.L_greater_than_64: | |
movdqa xmm0, xmmword ptr [rax+0x40] | |
pminub xmm0, xmmword ptr [rax+0x50] | |
pminub xmm0, xmmword ptr [rax+0x60] | |
pminub xmm0, xmmword ptr [rax+0x70] | |
pcmpeqb xmm0, xmm3 | |
pmovmskb edx, xmm0 | |
test edx, edx | |
jnz .L_add64 | |
sub rax, 0xFFFFFFFFFFFFFF80 | |
movdqa xmm0, xmmword ptr [rax] | |
pminub xmm0, xmmword ptr [rax+0x10] | |
pminub xmm0, xmmword ptr [rax+0x20] | |
pminub xmm0, xmmword ptr [rax+0x30] | |
pcmpeqb xmm0, xmm3 | |
pmovmskb edx, xmm0 | |
test edx, edx | |
jnz .L_end | |
jmp .L_greater_than_64 | |
.L_add64: | |
add rax, 0x40 | |
.L_end: | |
pxor xmm0, xmm0 | |
pcmpeqb xmm0, xmmword ptr [rax] | |
pcmpeqb xmm1, xmmword ptr [rax+0x10] | |
pcmpeqb xmm2, xmmword ptr [rax+0x20] | |
pcmpeqb xmm3, xmmword ptr [rax+0x30] | |
pmovmskb esi, xmm0 | |
pmovmskb edx, xmm1 | |
pmovmskb r8d, xmm2 | |
pmovmskb ecx, xmm3 | |
shl rdx, 0x10 | |
shl rcx, 0x10 | |
or rdx, rsi | |
or rcx, r8 | |
shl rcx, 0x20 | |
or rdx, rcx | |
bsf rdx, rdx | |
add rax, rdx | |
sub rax, rdi | |
ret | |
.L_null: | |
xor rax, rax | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment