Created
April 20, 2017 01:11
-
-
Save mntone/c3a1d921f137fef8dd292121ec0c3c59 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Function compile flags: /Ogtpy | |
; File | |
; COMDAT decompress_r10l | |
_TEXT SEGMENT | |
input$ = 32 | |
in_linesize$ = 40 | |
start_y$ = 48 | |
end_y$ = 56 | |
output$ = 64 | |
out_linesize$ = 72 | |
decompress_r10l PROC ; COMDAT | |
; 549 : { | |
$LN20: | |
mov QWORD PTR [rsp+24], rsi | |
mov QWORD PTR [rsp+32], rdi | |
push r14 | |
sub rsp, 16 | |
; 94 : return a < b ? a : b; | |
mov esi, DWORD PTR out_linesize$[rsp] | |
; 549 : { | |
mov eax, r9d | |
; 551 : uint32_t y; | |
; 552 : | |
; 553 : const __m128i base = _mm_set1_epi32(0xC0000000); | |
movdqa xmm3, XMMWORD PTR __xmm@c0000000c0000000c0000000c0000000 | |
; 94 : return a < b ? a : b; | |
cmp edx, esi | |
; 554 : const __m128i mask_r = _mm_set1_epi32(0xFFC00000); | |
movdqa xmm4, XMMWORD PTR __xmm@ffc00000ffc00000ffc00000ffc00000 | |
; 94 : return a < b ? a : b; | |
mov r11d, esi | |
; 555 : const __m128i mask_g = _mm_set1_epi32(0x003FF000); | |
movdqa xmm5, XMMWORD PTR __xmm@003ff000003ff000003ff000003ff000 | |
; 94 : return a < b ? a : b; | |
cmovb r11d, edx | |
; 550 : uint32_t width_d4 = min_uint32(in_linesize, out_linesize) / 16; | |
shr r11d, 4 | |
mov r10d, r8d | |
movaps XMMWORD PTR [rsp], xmm6 | |
mov edi, edx | |
; 556 : const __m128i mask_b = _mm_set1_epi32(0x00000FFC); | |
movdqa xmm6, XMMWORD PTR __xmm@00000ffc00000ffc00000ffc00000ffc | |
mov r14, rcx | |
; 557 : | |
; 558 : for (y = start_y; y < end_y; y++) { | |
cmp r8d, r9d | |
jae $LN3@decompress | |
mov QWORD PTR [rsp+32], rbx | |
; 94 : return a < b ? a : b; | |
mov r9d, edx | |
imul r9d, r10d | |
imul r8d, esi | |
sub eax, r10d | |
mov QWORD PTR [rsp+40], rbp | |
mov rbp, QWORD PTR output$[rsp] | |
mov ebx, eax | |
npad 8 | |
$LL4@decompress: | |
; 559 : const __m128i *input0; | |
; 560 : register __m128i *output0; | |
; 561 : uint32_t x; | |
; 562 : | |
; 563 : input0 = (const __m128i*)(input + y * in_linesize); | |
mov edx, r9d | |
; 564 : output0 = (__m128i*)(output + y * out_linesize); | |
mov eax, r8d | |
add rdx, r14 | |
add rax, rbp | |
; 565 : | |
; 566 : for (x = 0; x < width_d4; x++) { | |
test r11d, r11d | |
je SHORT $LN2@decompress | |
; 559 : const __m128i *input0; | |
; 560 : register __m128i *output0; | |
; 561 : uint32_t x; | |
; 562 : | |
; 563 : input0 = (const __m128i*)(input + y * in_linesize); | |
mov r10d, r11d | |
npad 12 | |
$LL7@decompress: | |
; 567 : const __m128i tmp = _mm_load_si128(input0++); | |
mov rcx, rdx | |
add rdx, 16 | |
movdqa xmm2, XMMWORD PTR [rcx] | |
; 568 : const __m128i r = _MM_AND_SRLI_EPI32(tmp, mask_r, 22); | |
; 569 : const __m128i g = _MM_AND_SRLI_EPI32(tmp, mask_g, 2); | |
; 570 : const __m128i b = _MM_AND_SLLI_EPI32(tmp, mask_b, 18); | |
; 571 : | |
; 572 : _mm_stream_si128(output0++, | |
mov rcx, rax | |
movdqa xmm1, xmm2 | |
movdqa xmm0, xmm2 | |
pand xmm1, xmm4 | |
pand xmm0, xmm5 | |
psrld xmm1, 22 | |
pand xmm2, xmm6 | |
por xmm1, xmm3 | |
psrld xmm0, 2 | |
por xmm1, xmm0 | |
pslld xmm2, 18 | |
add rax, 16 | |
por xmm1, xmm2 | |
movntdq XMMWORD PTR [rcx], xmm1 | |
sub r10, 1 | |
jne SHORT $LL7@decompress | |
$LN2@decompress: | |
; 557 : | |
; 558 : for (y = start_y; y < end_y; y++) { | |
add r9d, edi | |
add r8d, esi | |
sub rbx, 1 | |
jne SHORT $LL4@decompress | |
mov rbp, QWORD PTR [rsp+40] | |
mov rbx, QWORD PTR [rsp+32] | |
$LN3@decompress: | |
; 573 : _MM_OR_SI128_OP4(base, r, g, b)); | |
; 574 : } | |
; 575 : } | |
; 576 : } | |
mov rsi, QWORD PTR [rsp+48] | |
mov rdi, QWORD PTR [rsp+56] | |
movaps xmm6, XMMWORD PTR [rsp] | |
add rsp, 16 | |
pop r14 | |
ret 0 | |
decompress_r10l ENDP | |
_TEXT ENDS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <xmmintrin.h> | |
#include <emmintrin.h> | |
#define _MM_OR_SI128_OP3(x, y, z) _mm_or_si128(_mm_or_si128(x, y), z) | |
#define _MM_OR_SI128_OP4(x, y, z, w) _mm_or_si128(_MM_OR_SI128_OP3(x, y, z), w) | |
#define _MM_AND_SLLI_EPI32(x, m, i) _mm_slli_epi32(_mm_and_si128(x, m), i) | |
#define _MM_AND_SRLI_EPI32(x, m, i) _mm_srli_epi32(_mm_and_si128(x, m), i) | |
void decompress_r10l( | |
const uint8_t *input, const uint32_t in_linesize, | |
uint32_t start_y, uint32_t end_y, | |
uint8_t *output, uint32_t out_linesize) | |
{ | |
uint32_t width_d4 = min_uint32(in_linesize, out_linesize) / 16; | |
uint32_t y; | |
const __m128i base = _mm_set1_epi32(0xC0000000); | |
const __m128i mask_r = _mm_set1_epi32(0xFFC00000); | |
const __m128i mask_g = _mm_set1_epi32(0x003FF000); | |
const __m128i mask_b = _mm_set1_epi32(0x00000FFC); | |
for (y = start_y; y < end_y; y++) { | |
const __m128i *input0; | |
register __m128i *output0; | |
uint32_t x; | |
input0 = (const __m128i*)(input + y * in_linesize); | |
output0 = (__m128i*)(output + y * out_linesize); | |
for (x = 0; x < width_d4; x++) { | |
const __m128i tmp = _mm_load_si128(input0++); | |
const __m128i r = _MM_AND_SRLI_EPI32(tmp, mask_r, 22); | |
const __m128i g = _MM_AND_SRLI_EPI32(tmp, mask_g, 2); | |
const __m128i b = _MM_AND_SLLI_EPI32(tmp, mask_b, 18); | |
_mm_stream_si128(output0++, | |
_MM_OR_SI128_OP4(base, r, g, b)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment