Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created January 14, 2014 21:20
Embed
What would you like to do?
Cute Clang bug.
#include <emmintrin.h>
static const __m128i magic = _mm_setr_epi16(1 << 14, 1 << 12, 1 << 10, 1 << 8, 1 << 6, 1 << 4, 1 << 2, 1 << 0);
__m128i good_unpack_2bits_to_16(unsigned short x)
{
__m128i shifted = _mm_mullo_epi16(_mm_set1_epi16(x), magic);
return _mm_srli_epi16(shifted, 14);
}
__m128i bad_unpack_2bits_to_16(unsigned short x)
{
__m128i magic = _mm_setr_epi16(1 << 14, 1 << 12, 1 << 10, 1 << 8, 1 << 6, 1 << 4, 1 << 2, 1 << 0);
__m128i shifted = _mm_mullo_epi16(_mm_set1_epi16(x), magic);
return _mm_srli_epi16(shifted, 14);
}
/*
C:\devel\llvm\repro>..\bin\clang++ --version
clang version 3.4 (198054)
Target: i686-pc-mingw32
Thread model: posix
C:\devel\llvm\repro>..\bin\clang++ -O2 -S sse2_mul_shift.cpp
.def @feat.00;
.scl 3;
.type 0;
.endef
.globl @feat.00
@feat.00 = 1
.def __Z23good_unpack_2bits_to_16t;
.scl 2;
.type 32;
.endef
.text
.globl __Z23good_unpack_2bits_to_16t
.align 16, 0x90
__Z23good_unpack_2bits_to_16t:
pushl %ebp
movl %esp, %ebp
movd 8(%ebp), %xmm0
punpcklwd %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
pmullw __ZL5magic, %xmm0
psrlw $14, %xmm0
popl %ebp
ret
.def __Z22bad_unpack_2bits_to_16t;
.scl 2;
.type 32;
.endef
.globl __Z22bad_unpack_2bits_to_16t
.align 16, 0x90
__Z22bad_unpack_2bits_to_16t:
pushl %ebp
movl %esp, %ebp
movzwl 8(%ebp), %eax
movd %eax, %xmm0
movl %eax, %ecx
shll $8, %ecx
movd %ecx, %xmm1
punpcklwd %xmm0, %xmm1
movl %eax, %ecx
shll $4, %ecx
movd %ecx, %xmm0
movl %eax, %ecx
shll $12, %ecx
movd %ecx, %xmm2
punpcklwd %xmm0, %xmm2
punpcklwd %xmm1, %xmm2
leal (,%eax,4), %ecx
movd %ecx, %xmm0
movl %eax, %ecx
shll $10, %ecx
movd %ecx, %xmm1
punpcklwd %xmm0, %xmm1
movl %eax, %ecx
shll $6, %ecx
movd %ecx, %xmm3
shll $14, %eax
movd %eax, %xmm0
punpcklwd %xmm3, %xmm0
punpcklwd %xmm1, %xmm0
punpcklwd %xmm2, %xmm0
psrlw $14, %xmm0
popl %ebp
ret
.def __GLOBAL__I_a;
.scl 3;
.type 32;
.endef
.section .rdata,"r"
.align 16
LCPI2_0:
.long 268451840
.long 16778240
.long 1048640
.long 65540
.text
.align 16, 0x90
__GLOBAL__I_a:
pushl %ebp
movl %esp, %ebp
movaps LCPI2_0, %xmm0
movaps %xmm0, __ZL5magic
popl %ebp
ret
.lcomm __ZL5magic,16,16
.section .ctors,"w"
.align 4
.long __GLOBAL__I_a
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment