Skip to content

Instantly share code, notes, and snippets.

@jrmuizel
Created May 25, 2021 20:03
Show Gist options
  • Save jrmuizel/cc1ebd9ab02b5ce45fdbc38c6a1cbe1e to your computer and use it in GitHub Desktop.
Save jrmuizel/cc1ebd9ab02b5ce45fdbc38c6a1cbe1e to your computer and use it in GitHub Desktop.
blendTextureLinearR8 inner loop
+0x210 movaps -288(%rbp), %xmm0
+0x217 maxps -464(%rbp), %xmm0
+0x21e minps -448(%rbp), %xmm0
+0x225 movaps -304(%rbp), %xmm2
+0x22c maxps -496(%rbp), %xmm2
+0x233 minps -480(%rbp), %xmm2
+0x23a cvttps2dq %xmm0, %xmm1
+0x23e cvttps2dq %xmm2, %xmm0
+0x242 movdqa %xmm1, %xmm2
+0x246 pcmpeqd %xmm12, %xmm12
+0x24b pcmpgtd %xmm12, %xmm2
+0x250 pand %xmm1, %xmm2
+0x254 movdqa %xmm1, %xmm3
+0x258 psrad $7, %xmm3
+0x25d movdqa %xmm0, %xmm1
+0x261 psrad $7, %xmm1
+0x266 movdqa %xmm3, %xmm4
+0x26a pxor %xmm14, %xmm14
+0x26f pmaxsw %xmm14, %xmm4
+0x274 addl $-2, %r11d
+0x278 movd %r11d, %xmm5
+0x27d pshufd $0, %xmm5, %xmm8
+0x283 pminsw %xmm8, %xmm4
+0x288 movdqa %xmm1, %xmm6
+0x28c pmaxsw %xmm14, %xmm6
+0x291 addl $-1, %r12d
+0x295 movd %r12d, %xmm7
+0x29a pshufd $0, %xmm7, %xmm7
+0x29f pminsw %xmm7, %xmm6
+0x2a3 movd 8(%rdi), %xmm5
+0x2a8 pshufd $0, %xmm5, %xmm5
+0x2ad pmulld %xmm5, %xmm6
+0x2b2 paddd %xmm4, %xmm6
+0x2b6 pcmpgtd %xmm1, %xmm7
+0x2ba movdqa %xmm0, %xmm1
+0x2be pcmpgtd %xmm12, %xmm1
+0x2c3 pand %xmm5, %xmm1
+0x2c7 pand %xmm7, %xmm1
+0x2cb paddd %xmm6, %xmm1
+0x2cf pcmpgtd %xmm8, %xmm3
+0x2d4 por %xmm3, %xmm2
+0x2d8 pand 112222240(%rip), %xmm2
+0x2e0 psubd %xmm3, %xmm2
+0x2e4 movq (%rdi), %rax
+0x2e7 movd %xmm6, %esi
+0x2eb movslq %esi, %rsi
+0x2ee movzwl (%rax,%rsi), %esi
+0x2f2 movd %esi, %xmm3
+0x2f6 pextrd $1, %xmm6, %esi
+0x2fc movslq %esi, %rsi
+0x2ff movzwl (%rax,%rsi), %esi
+0x303 movd %esi, %xmm4
+0x307 pextrd $2, %xmm6, %esi
+0x30d punpcklwd %xmm4, %xmm3
+0x311 movslq %esi, %rsi
+0x314 movzwl (%rax,%rsi), %esi
+0x318 movd %esi, %xmm4
+0x31c pextrd $3, %xmm6, %esi
+0x322 movslq %esi, %rsi
+0x325 movzwl (%rax,%rsi), %esi
+0x329 movd %esi, %xmm5
+0x32d punpcklwd %xmm5, %xmm4
+0x331 movd %xmm1, %esi
+0x335 movslq %esi, %rsi
+0x338 movzwl (%rax,%rsi), %esi
+0x33c movd %esi, %xmm5
+0x340 pextrd $1, %xmm1, %esi
+0x346 punpckldq %xmm4, %xmm3
+0x34a movslq %esi, %rsi
+0x34d movzwl (%rax,%rsi), %esi
+0x351 movd %esi, %xmm4
+0x355 punpcklwd %xmm4, %xmm5
+0x359 pextrd $2, %xmm1, %esi
+0x35f movslq %esi, %rsi
+0x362 movzwl (%rax,%rsi), %esi
+0x366 movd %esi, %xmm4
+0x36a pextrd $3, %xmm1, %esi
+0x370 movslq %esi, %rsi
+0x373 movzwl (%rax,%rsi), %eax
+0x377 movd %eax, %xmm1
+0x37b punpcklwd %xmm1, %xmm4
+0x37f packssdw %xmm2, %xmm2
+0x383 pshufb 112222004(%rip), %xmm0
+0x38c pand 112222012(%rip), %xmm0
+0x394 pmovzxbw %xmm3, %xmm1
+0x399 punpckldq %xmm4, %xmm5
+0x39d pmovzxbw %xmm5, %xmm3
+0x3a2 psubw %xmm1, %xmm3
+0x3a6 punpcklwd %xmm0, %xmm0
+0x3aa pmullw %xmm3, %xmm0
+0x3ae psraw $7, %xmm0
+0x3b3 paddw %xmm1, %xmm0
+0x3b7 pshufb 112222064(%rip), %xmm0
+0x3c0 pshufd $238, %xmm0, %xmm1
+0x3c5 psubw %xmm0, %xmm1
+0x3c9 pmullw %xmm2, %xmm1
+0x3cd psraw $7, %xmm1
+0x3d2 paddw %xmm0, %xmm1
+0x3d6 punpcklwd %xmm1, %xmm1
+0x3da movdqa %xmm1, %xmm0
+0x3de punpcklwd %xmm1, %xmm0
+0x3e2 punpckhwd %xmm1, %xmm1
+0x3e6 movdqa -512(%rbp), %xmm2
+0x3ee pmullw %xmm2, %xmm1
+0x3f2 pmullw %xmm10, %xmm0
+0x3f7 paddw %xmm10, %xmm0
+0x3fc paddw %xmm2, %xmm1
+0x400 psrlw $8, %xmm1
+0x405 psrlw $8, %xmm0
+0x40a movdqu -16(%rbx), %xmm2
+0x40f movdqa %xmm2, %xmm12
+0x414 punpckhbw %xmm14, %xmm12
+0x419 movdqa %xmm2, %xmm8
+0x41e pmovzxbw %xmm2, %xmm14
+0x424 movzbl %dl, %eax
+0x427 movslq (%r13,%rax,4), %rax
+0x42c addq %r13, %rax
+0x42f jmpq *%rax
+0x477b pshuflw $255, %xmm0, %xmm2
+0x4780 paddw %xmm14, %xmm0
+0x4785 pshufhw $255, %xmm2, %xmm2
+0x478a pshuflw $255, %xmm1, %xmm3
+0x478f paddw %xmm12, %xmm1
+0x4794 pshufhw $255, %xmm3, %xmm3
+0x4799 pmullw %xmm12, %xmm3
+0x479e pmullw %xmm14, %xmm2
+0x47a3 paddw %xmm14, %xmm2
+0x47a8 paddw %xmm12, %xmm3
+0x47ad psrlw $8, %xmm3
+0x47b2 psubw %xmm3, %xmm1
+0x47b6 psrlw $8, %xmm2
+0x47bb psubw %xmm2, %xmm0
+0x47bf packuswb %xmm1, %xmm0
+0x47c3 movdqu %xmm0, -16(%rbx)
+0x47c8 cmpq %r8, %rbx
+0x47cb jb "int blendTextureLinearR8<true, glsl::sampler2D_impl*, unsigned short vector[16], unsigned int>(glsl::sampler2D_impl*, glsl::vec2, int, glsl::vec4_scalar const&, unsigned short vector[16], unsigned int*)+0x4936"
+0x47d1 jmp "int blendTextureLinearR8<true, glsl::sampler2D_impl*, unsigned short vector[16], unsigned int>(glsl::sampler2D_impl*, glsl::vec2, int, glsl::vec4_scalar const&, unsigned short vector[16], unsigned int*)+0x4a60"
+0x4936 addq $16, %r14
+0x493a movaps -304(%rbp), %xmm0
+0x4941 addps -432(%rbp), %xmm0
+0x4948 movaps %xmm0, -304(%rbp)
+0x494f movaps -288(%rbp), %xmm0
+0x4956 addps -416(%rbp), %xmm0
+0x495d movaps %xmm0, -288(%rbp)
+0x4964 movl 12(%rdi), %r12d
+0x4968 movl 16(%rdi), %r11d
+0x496c addq $16, %rbx
+0x4970 addq $16, %rcx
+0x4974 jmp "int blendTextureLinearR8<true, glsl::sampler2D_impl*, unsigned short vector[16], unsigned int>(glsl::sampler2D_impl*, glsl::vec2, int, glsl::vec4_scalar const&, unsigned short vector[16], unsigned int*)+0x210"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment