Cute Clang bug.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <emmintrin.h> | |
| static const __m128i magic = _mm_setr_epi16(1 << 14, 1 << 12, 1 << 10, 1 << 8, 1 << 6, 1 << 4, 1 << 2, 1 << 0); | |
| __m128i good_unpack_2bits_to_16(unsigned short x) | |
| { | |
| __m128i shifted = _mm_mullo_epi16(_mm_set1_epi16(x), magic); | |
| return _mm_srli_epi16(shifted, 14); | |
| } | |
| __m128i bad_unpack_2bits_to_16(unsigned short x) | |
| { | |
| __m128i magic = _mm_setr_epi16(1 << 14, 1 << 12, 1 << 10, 1 << 8, 1 << 6, 1 << 4, 1 << 2, 1 << 0); | |
| __m128i shifted = _mm_mullo_epi16(_mm_set1_epi16(x), magic); | |
| return _mm_srli_epi16(shifted, 14); | |
| } | |
| /* | |
| C:\devel\llvm\repro>..\bin\clang++ --version | |
| clang version 3.4 (198054) | |
| Target: i686-pc-mingw32 | |
| Thread model: posix | |
| C:\devel\llvm\repro>..\bin\clang++ -O2 -S sse2_mul_shift.cpp | |
| .def @feat.00; | |
| .scl 3; | |
| .type 0; | |
| .endef | |
| .globl @feat.00 | |
| @feat.00 = 1 | |
| .def __Z23good_unpack_2bits_to_16t; | |
| .scl 2; | |
| .type 32; | |
| .endef | |
| .text | |
| .globl __Z23good_unpack_2bits_to_16t | |
| .align 16, 0x90 | |
| __Z23good_unpack_2bits_to_16t: | |
| pushl %ebp | |
| movl %esp, %ebp | |
| movd 8(%ebp), %xmm0 | |
| punpcklwd %xmm0, %xmm0 | |
| pshufd $0, %xmm0, %xmm0 | |
| pmullw __ZL5magic, %xmm0 | |
| psrlw $14, %xmm0 | |
| popl %ebp | |
| ret | |
| .def __Z22bad_unpack_2bits_to_16t; | |
| .scl 2; | |
| .type 32; | |
| .endef | |
| .globl __Z22bad_unpack_2bits_to_16t | |
| .align 16, 0x90 | |
| __Z22bad_unpack_2bits_to_16t: | |
| pushl %ebp | |
| movl %esp, %ebp | |
| movzwl 8(%ebp), %eax | |
| movd %eax, %xmm0 | |
| movl %eax, %ecx | |
| shll $8, %ecx | |
| movd %ecx, %xmm1 | |
| punpcklwd %xmm0, %xmm1 | |
| movl %eax, %ecx | |
| shll $4, %ecx | |
| movd %ecx, %xmm0 | |
| movl %eax, %ecx | |
| shll $12, %ecx | |
| movd %ecx, %xmm2 | |
| punpcklwd %xmm0, %xmm2 | |
| punpcklwd %xmm1, %xmm2 | |
| leal (,%eax,4), %ecx | |
| movd %ecx, %xmm0 | |
| movl %eax, %ecx | |
| shll $10, %ecx | |
| movd %ecx, %xmm1 | |
| punpcklwd %xmm0, %xmm1 | |
| movl %eax, %ecx | |
| shll $6, %ecx | |
| movd %ecx, %xmm3 | |
| shll $14, %eax | |
| movd %eax, %xmm0 | |
| punpcklwd %xmm3, %xmm0 | |
| punpcklwd %xmm1, %xmm0 | |
| punpcklwd %xmm2, %xmm0 | |
| psrlw $14, %xmm0 | |
| popl %ebp | |
| ret | |
| .def __GLOBAL__I_a; | |
| .scl 3; | |
| .type 32; | |
| .endef | |
| .section .rdata,"r" | |
| .align 16 | |
| LCPI2_0: | |
| .long 268451840 | |
| .long 16778240 | |
| .long 1048640 | |
| .long 65540 | |
| .text | |
| .align 16, 0x90 | |
| __GLOBAL__I_a: | |
| pushl %ebp | |
| movl %esp, %ebp | |
| movaps LCPI2_0, %xmm0 | |
| movaps %xmm0, __ZL5magic | |
| popl %ebp | |
| ret | |
| .lcomm __ZL5magic,16,16 | |
| .section .ctors,"w" | |
| .align 4 | |
| .long __GLOBAL__I_a | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment