-
-
Save CryZe/5895306177cc351b43e50ac7481bec90 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
day_02::part2_simd: | |
push rbp | |
push r15 | |
push r14 | |
push r13 | |
push r12 | |
push rsi | |
push rdi | |
push rbx | |
mov eax, 8312 | |
call __chkstk | |
sub rsp, rax | |
lea rbp, [rsp, +, 128] | |
and rsp, -32 | |
mov rsi, rdx | |
mov qword, ptr, [rsp, +, 152], rcx | |
lea rdi, [rsp, +, 8288] | |
lea r14, [rsp, +, 288] | |
xor edx, edx | |
mov r8d, 8000 | |
mov rcx, r14 | |
call memset | |
vxorps xmm0, xmm0, xmm0 | |
vmovaps ymmword, ptr, [rsp, +, 224], ymm0 | |
mov qword, ptr, [rsp, +, 32], r14 | |
mov qword, ptr, [rsp, +, 40], rdi | |
vmovups zmm0, zmmword, ptr, [rsi] | |
vmovups zmmword, ptr, [rsp, +, 48], zmm0 | |
mov rax, qword, ptr, [rsi, +, 64] | |
mov qword, ptr, [rsp, +, 112], rax | |
vxorps xmm0, xmm0, xmm0 | |
vmovups xmmword, ptr, [rsp, +, 120], xmm0 | |
lea r12, [rsp, +, 224] | |
.LBB3_1: | |
lea rax, [r14, +, 32] | |
mov qword, ptr, [rsp, +, 32], rax | |
cmp byte, ptr, [rsp, +, 113], 0 | |
jne .LBB3_2 | |
mov rdx, qword, ptr, [rsp, +, 80] | |
mov rax, qword, ptr, [rsp, +, 88] | |
mov r8, rax | |
sub r8, rdx | |
jb .LBB3_8 | |
cmp qword, ptr, [rsp, +, 72], rax | |
jb .LBB3_8 | |
mov r13, qword, ptr, [rsp, +, 64] | |
mov rax, qword, ptr, [rsp, +, 96] | |
add rdx, r13 | |
mov cl, byte, ptr, [rsp, +, rax, +, 107] | |
vzeroupper | |
call core::slice::memchr::memchr | |
cmp rax, 1 | |
je .LBB3_11 | |
jmp .LBB3_7 | |
.LBB3_21: | |
add rsi, qword, ptr, [rsp, +, 64] | |
movzx ecx, byte, ptr, [rsp, +, rbx, +, 107] | |
mov rdx, rsi | |
call core::slice::memchr::memchr | |
cmp rax, 1 | |
jne .LBB3_7 | |
.LBB3_11: | |
mov rax, qword, ptr, [rsp, +, 80] | |
mov rbx, qword, ptr, [rsp, +, 96] | |
lea rsi, [rdx, +, rax] | |
add rsi, 1 | |
mov qword, ptr, [rsp, +, 80], rsi | |
mov rdi, rsi | |
sub rdi, rbx | |
jae .LBB3_13 | |
mov r15, qword, ptr, [rsp, +, 72] | |
mov rax, qword, ptr, [rsp, +, 88] | |
mov r8, rax | |
sub r8, rsi | |
jae .LBB3_20 | |
jmp .LBB3_8 | |
.LBB3_13: | |
mov r15, qword, ptr, [rsp, +, 72] | |
cmp r15, rsi | |
jb .LBB3_19 | |
cmp rbx, 5 | |
jae .LBB3_15 | |
mov rcx, qword, ptr, [rsp, +, 64] | |
add rcx, rdi | |
lea rax, [rsp, +, 108] | |
cmp rcx, rax | |
je .LBB3_24 | |
lea rdx, [rsp, +, 108] | |
mov r8, rbx | |
call memcmp | |
test eax, eax | |
je .LBB3_24 | |
.LBB3_19: | |
mov rax, qword, ptr, [rsp, +, 88] | |
mov r8, rax | |
sub r8, rsi | |
jb .LBB3_8 | |
.LBB3_20: | |
cmp r15, rax | |
jae .LBB3_21 | |
jmp .LBB3_8 | |
.LBB3_7: | |
mov rax, qword, ptr, [rsp, +, 88] | |
mov qword, ptr, [rsp, +, 80], rax | |
.LBB3_8: | |
cmp byte, ptr, [rsp, +, 113], 0 | |
je .LBB3_9 | |
.LBB3_2: | |
xor r13d, r13d | |
test r13, r13 | |
je .LBB3_29 | |
.LBB3_30: | |
mov rcx, r13 | |
vzeroupper | |
call core::str::<impl str>::trim_end | |
mov rbx, rdx | |
cmp rdx, 33 | |
jae .LBB3_31 | |
mov rcx, r12 | |
mov rdx, rax | |
mov r8, rbx | |
call memcpy | |
vmovaps ymm0, ymmword, ptr, [rsp, +, 224] | |
vmovaps ymmword, ptr, [r14], ymm0 | |
mov r14, qword, ptr, [rsp, +, 32] | |
cmp r14, qword, ptr, [rsp, +, 40] | |
jne .LBB3_1 | |
jmp .LBB3_29 | |
.LBB3_9: | |
cmp byte, ptr, [rsp, +, 112], 0 | |
je .LBB3_22 | |
mov r13, qword, ptr, [rsp, +, 48] | |
mov rdi, qword, ptr, [rsp, +, 56] | |
jmp .LBB3_23 | |
.LBB3_22: | |
mov r13, qword, ptr, [rsp, +, 48] | |
mov rdi, qword, ptr, [rsp, +, 56] | |
cmp rdi, r13 | |
je .LBB3_2 | |
.LBB3_23: | |
mov byte, ptr, [rsp, +, 113], 1 | |
sub rdi, r13 | |
add r13, qword, ptr, [rsp, +, 64] | |
test rdi, rdi | |
je .LBB3_26 | |
.LBB3_27: | |
lea rdx, [rdi, -, 1] | |
cmp byte, ptr, [r13, +, rdi, -, 1], 13 | |
cmovne rdx, rdi | |
test r13, r13 | |
jne .LBB3_30 | |
jmp .LBB3_29 | |
.LBB3_24: | |
mov rax, qword, ptr, [rsp, +, 48] | |
add r13, rax | |
sub rdi, rax | |
mov qword, ptr, [rsp, +, 48], rsi | |
test rdi, rdi | |
jne .LBB3_27 | |
.LBB3_26: | |
xor edx, edx | |
test r13, r13 | |
jne .LBB3_30 | |
.LBB3_29: | |
lea rdi, [rsp, +, 288] | |
mov eax, 7968 | |
mov ecx, 7936 | |
mov edx, 7904 | |
mov r9d, 7872 | |
vmovdqa ymm0, ymmword, ptr, [rip, +, __ymm@0101010101010101010101010101010101010101010101010101010101010101] | |
jmp .LBB3_34 | |
.LBB3_33: | |
add rax, -128 | |
add rcx, -128 | |
add rdx, -128 | |
add r9, -128 | |
mov rdi, r8 | |
.LBB3_34: | |
vmovdqa ymm3, ymmword, ptr, [rdi] | |
xor esi, esi | |
.LBB3_35: | |
cmp rax, rsi | |
je .LBB3_63 | |
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 32] | |
vpcmpeqb ymm1, ymm3, ymm4 | |
vpand ymm2, ymm1, ymm0 | |
vextracti128 xmm2, ymm2, 1 | |
vpsubb ymm1, ymm2, ymm1 | |
vpshufd xmm2, xmm1, 78 | |
vpaddb ymm1, ymm1, ymm2 | |
vpshufd xmm2, xmm1, 229 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrld xmm2, xmm1, 16 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrlw xmm2, xmm1, 8 | |
vpaddb ymm1, ymm1, ymm2 | |
vpextrb ebx, xmm1, 0 | |
add rsi, 32 | |
cmp bl, 31 | |
jne .LBB3_35 | |
jmp .LBB3_37 | |
.LBB3_63: | |
lea r8, [rdi, +, 64] | |
vmovdqa ymm3, ymmword, ptr, [rdi, +, 32] | |
xor esi, esi | |
.LBB3_64: | |
cmp rcx, rsi | |
je .LBB3_66 | |
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 64] | |
vpcmpeqb ymm1, ymm3, ymm4 | |
vpand ymm2, ymm1, ymm0 | |
vextracti128 xmm2, ymm2, 1 | |
vpsubb ymm1, ymm2, ymm1 | |
vpshufd xmm2, xmm1, 78 | |
vpaddb ymm1, ymm1, ymm2 | |
vpshufd xmm2, xmm1, 229 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrld xmm2, xmm1, 16 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrlw xmm2, xmm1, 8 | |
vpaddb ymm1, ymm1, ymm2 | |
vpextrb ebx, xmm1, 0 | |
add rsi, 32 | |
cmp bl, 31 | |
jne .LBB3_64 | |
jmp .LBB3_37 | |
.LBB3_66: | |
lea rbx, [rsp, +, 8288] | |
cmp rbx, r8 | |
je .LBB3_61 | |
vmovdqa ymm3, ymmword, ptr, [rdi, +, 64] | |
xor esi, esi | |
.LBB3_68: | |
cmp rdx, rsi | |
je .LBB3_70 | |
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 96] | |
vpcmpeqb ymm1, ymm3, ymm4 | |
vpand ymm2, ymm1, ymm0 | |
vextracti128 xmm2, ymm2, 1 | |
vpsubb ymm1, ymm2, ymm1 | |
vpshufd xmm2, xmm1, 78 | |
vpaddb ymm1, ymm1, ymm2 | |
vpshufd xmm2, xmm1, 229 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrld xmm2, xmm1, 16 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrlw xmm2, xmm1, 8 | |
vpaddb ymm1, ymm1, ymm2 | |
vpextrb ebx, xmm1, 0 | |
add rsi, 32 | |
cmp bl, 31 | |
jne .LBB3_68 | |
jmp .LBB3_37 | |
.LBB3_70: | |
mov r8, rdi | |
sub r8, -128 | |
vmovdqa ymm3, ymmword, ptr, [rdi, +, 96] | |
xor esi, esi | |
.LBB3_71: | |
cmp r9, rsi | |
je .LBB3_33 | |
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 128] | |
vpcmpeqb ymm1, ymm3, ymm4 | |
vpand ymm2, ymm1, ymm0 | |
vextracti128 xmm2, ymm2, 1 | |
vpsubb ymm1, ymm2, ymm1 | |
vpshufd xmm2, xmm1, 78 | |
vpaddb ymm1, ymm1, ymm2 | |
vpshufd xmm2, xmm1, 229 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrld xmm2, xmm1, 16 | |
vpaddb ymm1, ymm1, ymm2 | |
vpsrlw xmm2, xmm1, 8 | |
vpaddb ymm1, ymm1, ymm2 | |
vpextrb ebx, xmm1, 0 | |
add rsi, 32 | |
cmp bl, 31 | |
jne .LBB3_71 | |
.LBB3_37: | |
vmovdqa ymmword, ptr, [rsp, +, 160], ymm4 | |
vmovdqa ymmword, ptr, [rsp, +, 192], ymm3 | |
vzeroupper | |
call GetProcessHeap | |
mov esi, 25 | |
xor edx, edx | |
mov r8d, 25 | |
mov rcx, rax | |
call HeapAlloc | |
test rax, rax | |
je .LBB3_73 | |
mov r15, rax | |
vmovdqa ymm0, ymmword, ptr, [rsp, +, 192] | |
vmovdqa ymmword, ptr, [rsp, +, 256], ymm0 | |
vmovdqa ymm1, ymmword, ptr, [rsp, +, 160] | |
vmovdqa ymmword, ptr, [rsp, +, 32], ymm1 | |
vpextrb eax, xmm0, 0 | |
vpextrb ebx, xmm1, 0 | |
mov edi, 1 | |
xor r14d, r14d | |
cmp al, bl | |
jne .LBB3_44 | |
jmp .LBB3_40 | |
.LBB3_45: | |
mov al, byte, ptr, [rsp, +, rdi, +, 256] | |
mov bl, byte, ptr, [rsp, +, rdi, +, 32] | |
add rdi, 1 | |
cmp al, bl | |
jne .LBB3_44 | |
.LBB3_40: | |
test bl, bl | |
js .LBB3_41 | |
cmp rsi, r14 | |
jne .LBB3_52 | |
mov rsi, r14 | |
add rsi, 1 | |
jb .LBB3_60 | |
lea rax, [r14, +, r14] | |
cmp rsi, rax | |
cmovb rsi, rax | |
test r14, r14 | |
je .LBB3_49 | |
vzeroupper | |
call GetProcessHeap | |
xor edx, edx | |
mov rcx, rax | |
mov r8, r15 | |
mov r9, rsi | |
call HeapReAlloc | |
mov r15, rax | |
test rax, rax | |
jne .LBB3_52 | |
jmp .LBB3_75 | |
.LBB3_41: | |
mov rax, rsi | |
sub rax, r14 | |
cmp rax, 2 | |
jae .LBB3_42 | |
mov r13, r14 | |
add r13, 2 | |
jb .LBB3_60 | |
lea r12, [rsi, +, rsi] | |
cmp r13, r12 | |
cmovae r12, r13 | |
test rsi, rsi | |
je .LBB3_55 | |
vzeroupper | |
call GetProcessHeap | |
xor edx, edx | |
mov rcx, rax | |
mov r8, r15 | |
mov r9, r12 | |
call HeapReAlloc | |
mov r15, rax | |
test rax, rax | |
jne .LBB3_58 | |
jmp .LBB3_59 | |
.LBB3_42: | |
lea r13, [r14, +, 2] | |
jmp .LBB3_43 | |
.LBB3_49: | |
vzeroupper | |
call GetProcessHeap | |
xor edx, edx | |
mov rcx, rax | |
mov r8, rsi | |
call HeapAlloc | |
mov r15, rax | |
test rax, rax | |
je .LBB3_75 | |
.LBB3_52: | |
mov byte, ptr, [r15, +, r14], bl | |
add r14, 1 | |
.LBB3_44: | |
cmp rdi, 31 | |
jbe .LBB3_45 | |
jmp .LBB3_74 | |
.LBB3_55: | |
vzeroupper | |
call GetProcessHeap | |
xor edx, edx | |
mov rcx, rax | |
mov r8, r12 | |
call HeapAlloc | |
mov r15, rax | |
test rax, rax | |
je .LBB3_59 | |
.LBB3_58: | |
mov rsi, r12 | |
.LBB3_43: | |
mov eax, ebx | |
shr al, 6 | |
or al, -64 | |
and bl, 63 | |
or bl, -128 | |
mov byte, ptr, [r15, +, r14], al | |
mov byte, ptr, [r15, +, r14, +, 1], bl | |
mov r14, r13 | |
cmp rdi, 31 | |
jbe .LBB3_45 | |
.LBB3_74: | |
mov rax, qword, ptr, [rsp, +, 152] | |
mov qword, ptr, [rax], r15 | |
mov qword, ptr, [rax, +, 8], rsi | |
mov qword, ptr, [rax, +, 16], r14 | |
.LBB3_62: | |
lea rsp, [rbp, +, 8184] | |
pop rbx | |
pop rdi | |
pop rsi | |
pop r12 | |
pop r13 | |
pop r14 | |
pop r15 | |
pop rbp | |
vzeroupper | |
ret | |
.LBB3_61: | |
mov rax, qword, ptr, [rsp, +, 152] | |
mov qword, ptr, [rax], 0 | |
jmp .LBB3_62 | |
.LBB3_31: | |
mov edx, 32 | |
mov rcx, rbx | |
call core::slice::slice_index_len_fail | |
ud2 | |
.LBB3_60: | |
vzeroupper | |
call alloc::raw_vec::capacity_overflow | |
ud2 | |
.LBB3_75: | |
mov edx, 1 | |
mov rcx, rsi | |
call alloc::alloc::handle_alloc_error | |
ud2 | |
.LBB3_15: | |
mov edx, 4 | |
mov rcx, rbx | |
call core::slice::slice_index_len_fail | |
ud2 | |
.LBB3_59: | |
mov edx, 1 | |
mov rcx, r12 | |
call alloc::alloc::handle_alloc_error | |
ud2 | |
.LBB3_73: | |
mov ecx, 25 | |
mov edx, 1 | |
call alloc::alloc::handle_alloc_error | |
ud2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment