Skip to content

Instantly share code, notes, and snippets.

@CryZe
Last active December 2, 2018 20:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CryZe/5895306177cc351b43e50ac7481bec90 to your computer and use it in GitHub Desktop.
Save CryZe/5895306177cc351b43e50ac7481bec90 to your computer and use it in GitHub Desktop.
day_02::part2_simd:
push rbp
push r15
push r14
push r13
push r12
push rsi
push rdi
push rbx
mov eax, 8312
call __chkstk
sub rsp, rax
lea rbp, [rsp, +, 128]
and rsp, -32
mov rsi, rdx
mov qword, ptr, [rsp, +, 152], rcx
lea rdi, [rsp, +, 8288]
lea r14, [rsp, +, 288]
xor edx, edx
mov r8d, 8000
mov rcx, r14
call memset
vxorps xmm0, xmm0, xmm0
vmovaps ymmword, ptr, [rsp, +, 224], ymm0
mov qword, ptr, [rsp, +, 32], r14
mov qword, ptr, [rsp, +, 40], rdi
vmovups zmm0, zmmword, ptr, [rsi]
vmovups zmmword, ptr, [rsp, +, 48], zmm0
mov rax, qword, ptr, [rsi, +, 64]
mov qword, ptr, [rsp, +, 112], rax
vxorps xmm0, xmm0, xmm0
vmovups xmmword, ptr, [rsp, +, 120], xmm0
lea r12, [rsp, +, 224]
.LBB3_1:
lea rax, [r14, +, 32]
mov qword, ptr, [rsp, +, 32], rax
cmp byte, ptr, [rsp, +, 113], 0
jne .LBB3_2
mov rdx, qword, ptr, [rsp, +, 80]
mov rax, qword, ptr, [rsp, +, 88]
mov r8, rax
sub r8, rdx
jb .LBB3_8
cmp qword, ptr, [rsp, +, 72], rax
jb .LBB3_8
mov r13, qword, ptr, [rsp, +, 64]
mov rax, qword, ptr, [rsp, +, 96]
add rdx, r13
mov cl, byte, ptr, [rsp, +, rax, +, 107]
vzeroupper
call core::slice::memchr::memchr
cmp rax, 1
je .LBB3_11
jmp .LBB3_7
.LBB3_21:
add rsi, qword, ptr, [rsp, +, 64]
movzx ecx, byte, ptr, [rsp, +, rbx, +, 107]
mov rdx, rsi
call core::slice::memchr::memchr
cmp rax, 1
jne .LBB3_7
.LBB3_11:
mov rax, qword, ptr, [rsp, +, 80]
mov rbx, qword, ptr, [rsp, +, 96]
lea rsi, [rdx, +, rax]
add rsi, 1
mov qword, ptr, [rsp, +, 80], rsi
mov rdi, rsi
sub rdi, rbx
jae .LBB3_13
mov r15, qword, ptr, [rsp, +, 72]
mov rax, qword, ptr, [rsp, +, 88]
mov r8, rax
sub r8, rsi
jae .LBB3_20
jmp .LBB3_8
.LBB3_13:
mov r15, qword, ptr, [rsp, +, 72]
cmp r15, rsi
jb .LBB3_19
cmp rbx, 5
jae .LBB3_15
mov rcx, qword, ptr, [rsp, +, 64]
add rcx, rdi
lea rax, [rsp, +, 108]
cmp rcx, rax
je .LBB3_24
lea rdx, [rsp, +, 108]
mov r8, rbx
call memcmp
test eax, eax
je .LBB3_24
.LBB3_19:
mov rax, qword, ptr, [rsp, +, 88]
mov r8, rax
sub r8, rsi
jb .LBB3_8
.LBB3_20:
cmp r15, rax
jae .LBB3_21
jmp .LBB3_8
.LBB3_7:
mov rax, qword, ptr, [rsp, +, 88]
mov qword, ptr, [rsp, +, 80], rax
.LBB3_8:
cmp byte, ptr, [rsp, +, 113], 0
je .LBB3_9
.LBB3_2:
xor r13d, r13d
test r13, r13
je .LBB3_29
.LBB3_30:
mov rcx, r13
vzeroupper
call core::str::<impl str>::trim_end
mov rbx, rdx
cmp rdx, 33
jae .LBB3_31
mov rcx, r12
mov rdx, rax
mov r8, rbx
call memcpy
vmovaps ymm0, ymmword, ptr, [rsp, +, 224]
vmovaps ymmword, ptr, [r14], ymm0
mov r14, qword, ptr, [rsp, +, 32]
cmp r14, qword, ptr, [rsp, +, 40]
jne .LBB3_1
jmp .LBB3_29
.LBB3_9:
cmp byte, ptr, [rsp, +, 112], 0
je .LBB3_22
mov r13, qword, ptr, [rsp, +, 48]
mov rdi, qword, ptr, [rsp, +, 56]
jmp .LBB3_23
.LBB3_22:
mov r13, qword, ptr, [rsp, +, 48]
mov rdi, qword, ptr, [rsp, +, 56]
cmp rdi, r13
je .LBB3_2
.LBB3_23:
mov byte, ptr, [rsp, +, 113], 1
sub rdi, r13
add r13, qword, ptr, [rsp, +, 64]
test rdi, rdi
je .LBB3_26
.LBB3_27:
lea rdx, [rdi, -, 1]
cmp byte, ptr, [r13, +, rdi, -, 1], 13
cmovne rdx, rdi
test r13, r13
jne .LBB3_30
jmp .LBB3_29
.LBB3_24:
mov rax, qword, ptr, [rsp, +, 48]
add r13, rax
sub rdi, rax
mov qword, ptr, [rsp, +, 48], rsi
test rdi, rdi
jne .LBB3_27
.LBB3_26:
xor edx, edx
test r13, r13
jne .LBB3_30
.LBB3_29:
lea rdi, [rsp, +, 288]
mov eax, 7968
mov ecx, 7936
mov edx, 7904
mov r9d, 7872
vmovdqa ymm0, ymmword, ptr, [rip, +, __ymm@0101010101010101010101010101010101010101010101010101010101010101]
jmp .LBB3_34
.LBB3_33:
add rax, -128
add rcx, -128
add rdx, -128
add r9, -128
mov rdi, r8
.LBB3_34:
vmovdqa ymm3, ymmword, ptr, [rdi]
xor esi, esi
.LBB3_35:
cmp rax, rsi
je .LBB3_63
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 32]
vpcmpeqb ymm1, ymm3, ymm4
vpand ymm2, ymm1, ymm0
vextracti128 xmm2, ymm2, 1
vpsubb ymm1, ymm2, ymm1
vpshufd xmm2, xmm1, 78
vpaddb ymm1, ymm1, ymm2
vpshufd xmm2, xmm1, 229
vpaddb ymm1, ymm1, ymm2
vpsrld xmm2, xmm1, 16
vpaddb ymm1, ymm1, ymm2
vpsrlw xmm2, xmm1, 8
vpaddb ymm1, ymm1, ymm2
vpextrb ebx, xmm1, 0
add rsi, 32
cmp bl, 31
jne .LBB3_35
jmp .LBB3_37
.LBB3_63:
lea r8, [rdi, +, 64]
vmovdqa ymm3, ymmword, ptr, [rdi, +, 32]
xor esi, esi
.LBB3_64:
cmp rcx, rsi
je .LBB3_66
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 64]
vpcmpeqb ymm1, ymm3, ymm4
vpand ymm2, ymm1, ymm0
vextracti128 xmm2, ymm2, 1
vpsubb ymm1, ymm2, ymm1
vpshufd xmm2, xmm1, 78
vpaddb ymm1, ymm1, ymm2
vpshufd xmm2, xmm1, 229
vpaddb ymm1, ymm1, ymm2
vpsrld xmm2, xmm1, 16
vpaddb ymm1, ymm1, ymm2
vpsrlw xmm2, xmm1, 8
vpaddb ymm1, ymm1, ymm2
vpextrb ebx, xmm1, 0
add rsi, 32
cmp bl, 31
jne .LBB3_64
jmp .LBB3_37
.LBB3_66:
lea rbx, [rsp, +, 8288]
cmp rbx, r8
je .LBB3_61
vmovdqa ymm3, ymmword, ptr, [rdi, +, 64]
xor esi, esi
.LBB3_68:
cmp rdx, rsi
je .LBB3_70
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 96]
vpcmpeqb ymm1, ymm3, ymm4
vpand ymm2, ymm1, ymm0
vextracti128 xmm2, ymm2, 1
vpsubb ymm1, ymm2, ymm1
vpshufd xmm2, xmm1, 78
vpaddb ymm1, ymm1, ymm2
vpshufd xmm2, xmm1, 229
vpaddb ymm1, ymm1, ymm2
vpsrld xmm2, xmm1, 16
vpaddb ymm1, ymm1, ymm2
vpsrlw xmm2, xmm1, 8
vpaddb ymm1, ymm1, ymm2
vpextrb ebx, xmm1, 0
add rsi, 32
cmp bl, 31
jne .LBB3_68
jmp .LBB3_37
.LBB3_70:
mov r8, rdi
sub r8, -128
vmovdqa ymm3, ymmword, ptr, [rdi, +, 96]
xor esi, esi
.LBB3_71:
cmp r9, rsi
je .LBB3_33
vmovdqa ymm4, ymmword, ptr, [rdi, +, rsi, +, 128]
vpcmpeqb ymm1, ymm3, ymm4
vpand ymm2, ymm1, ymm0
vextracti128 xmm2, ymm2, 1
vpsubb ymm1, ymm2, ymm1
vpshufd xmm2, xmm1, 78
vpaddb ymm1, ymm1, ymm2
vpshufd xmm2, xmm1, 229
vpaddb ymm1, ymm1, ymm2
vpsrld xmm2, xmm1, 16
vpaddb ymm1, ymm1, ymm2
vpsrlw xmm2, xmm1, 8
vpaddb ymm1, ymm1, ymm2
vpextrb ebx, xmm1, 0
add rsi, 32
cmp bl, 31
jne .LBB3_71
.LBB3_37:
vmovdqa ymmword, ptr, [rsp, +, 160], ymm4
vmovdqa ymmword, ptr, [rsp, +, 192], ymm3
vzeroupper
call GetProcessHeap
mov esi, 25
xor edx, edx
mov r8d, 25
mov rcx, rax
call HeapAlloc
test rax, rax
je .LBB3_73
mov r15, rax
vmovdqa ymm0, ymmword, ptr, [rsp, +, 192]
vmovdqa ymmword, ptr, [rsp, +, 256], ymm0
vmovdqa ymm1, ymmword, ptr, [rsp, +, 160]
vmovdqa ymmword, ptr, [rsp, +, 32], ymm1
vpextrb eax, xmm0, 0
vpextrb ebx, xmm1, 0
mov edi, 1
xor r14d, r14d
cmp al, bl
jne .LBB3_44
jmp .LBB3_40
.LBB3_45:
mov al, byte, ptr, [rsp, +, rdi, +, 256]
mov bl, byte, ptr, [rsp, +, rdi, +, 32]
add rdi, 1
cmp al, bl
jne .LBB3_44
.LBB3_40:
test bl, bl
js .LBB3_41
cmp rsi, r14
jne .LBB3_52
mov rsi, r14
add rsi, 1
jb .LBB3_60
lea rax, [r14, +, r14]
cmp rsi, rax
cmovb rsi, rax
test r14, r14
je .LBB3_49
vzeroupper
call GetProcessHeap
xor edx, edx
mov rcx, rax
mov r8, r15
mov r9, rsi
call HeapReAlloc
mov r15, rax
test rax, rax
jne .LBB3_52
jmp .LBB3_75
.LBB3_41:
mov rax, rsi
sub rax, r14
cmp rax, 2
jae .LBB3_42
mov r13, r14
add r13, 2
jb .LBB3_60
lea r12, [rsi, +, rsi]
cmp r13, r12
cmovae r12, r13
test rsi, rsi
je .LBB3_55
vzeroupper
call GetProcessHeap
xor edx, edx
mov rcx, rax
mov r8, r15
mov r9, r12
call HeapReAlloc
mov r15, rax
test rax, rax
jne .LBB3_58
jmp .LBB3_59
.LBB3_42:
lea r13, [r14, +, 2]
jmp .LBB3_43
.LBB3_49:
vzeroupper
call GetProcessHeap
xor edx, edx
mov rcx, rax
mov r8, rsi
call HeapAlloc
mov r15, rax
test rax, rax
je .LBB3_75
.LBB3_52:
mov byte, ptr, [r15, +, r14], bl
add r14, 1
.LBB3_44:
cmp rdi, 31
jbe .LBB3_45
jmp .LBB3_74
.LBB3_55:
vzeroupper
call GetProcessHeap
xor edx, edx
mov rcx, rax
mov r8, r12
call HeapAlloc
mov r15, rax
test rax, rax
je .LBB3_59
.LBB3_58:
mov rsi, r12
.LBB3_43:
mov eax, ebx
shr al, 6
or al, -64
and bl, 63
or bl, -128
mov byte, ptr, [r15, +, r14], al
mov byte, ptr, [r15, +, r14, +, 1], bl
mov r14, r13
cmp rdi, 31
jbe .LBB3_45
.LBB3_74:
mov rax, qword, ptr, [rsp, +, 152]
mov qword, ptr, [rax], r15
mov qword, ptr, [rax, +, 8], rsi
mov qword, ptr, [rax, +, 16], r14
.LBB3_62:
lea rsp, [rbp, +, 8184]
pop rbx
pop rdi
pop rsi
pop r12
pop r13
pop r14
pop r15
pop rbp
vzeroupper
ret
.LBB3_61:
mov rax, qword, ptr, [rsp, +, 152]
mov qword, ptr, [rax], 0
jmp .LBB3_62
.LBB3_31:
mov edx, 32
mov rcx, rbx
call core::slice::slice_index_len_fail
ud2
.LBB3_60:
vzeroupper
call alloc::raw_vec::capacity_overflow
ud2
.LBB3_75:
mov edx, 1
mov rcx, rsi
call alloc::alloc::handle_alloc_error
ud2
.LBB3_15:
mov edx, 4
mov rcx, rbx
call core::slice::slice_index_len_fail
ud2
.LBB3_59:
mov edx, 1
mov rcx, r12
call alloc::alloc::handle_alloc_error
ud2
.LBB3_73:
mov ecx, 25
mov edx, 1
call alloc::alloc::handle_alloc_error
ud2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment