Skip to content

Instantly share code, notes, and snippets.

@gottesmm
Created May 21, 2018 18:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gottesmm/08b5d69e7bf1dc2a83d75b1b592dec45 to your computer and use it in GitHub Desktop.
Save gottesmm/08b5d69e7bf1dc2a83d75b1b592dec45 to your computer and use it in GitHub Desktop.
// xcrun clang++ test.cpp -O3 -o - -S -std=c++11 -mavx
#include <simd/simd.h>
namespace {
static constexpr simd_packed_uchar16 zero = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0};
} // end anonymous namespace
static simd::ushort8 unpackLower(simd::uchar16 input) {
return (simd::ushort8)__builtin_shufflevector(
input, zero, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
}
static simd::ushort8 unpackUpper(simd::uchar16 input) {
return (simd::ushort8)__builtin_shufflevector(input, zero, 8, 24, 9, 25,
10, 26, 11, 27, 12, 28,
13, 29, 14, 30, 15, 31);
}
static simd::short8 performComparisonLow(simd::uchar16 lhs, simd::ushort8 rhs) {
return unpackLower(lhs) != rhs;
}
static simd::short8 performComparisonHigh(simd::uchar16 lhs, simd::ushort8 rhs) {
return unpackUpper(lhs) != rhs;
}
simd::char16 equalP2(simd::ushort8 *lhs, simd::uchar16 rhs) {
return (simd::char16)(performComparisonLow(rhs, lhs[0]) | performComparisonHigh(rhs, lhs[1]));
}
simd::char16 equalP(simd::ushort8 *lhs, simd::uchar16 rhs) {
auto lhsAsUInt8 = ((simd::uchar32 *)lhs)[0];
// lhs == rhs if the low bytes are equal and the high-byte of lhs is zero.
return lhsAsUInt8.even == rhs & lhsAsUInt8.odd == 0;
}
## // xcrun clang++ test.cpp -O3 -o - -S -std=c++11 -mavx
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 13
.globl __Z7equalP2PDv8_tDv16_h ## -- Begin function _Z7equalP2PDv8_tDv16_h
.p2align 4, 0x90
__Z7equalP2PDv8_tDv16_h: ## @_Z7equalP2PDv8_tDv16_h
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
vpmovzxbw %xmm0, %xmm1 ## xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
vpcmpeqw (%rdi), %xmm1, %xmm1
vpcmpeqd %xmm2, %xmm2, %xmm2
vpxor %xmm2, %xmm1, %xmm1
vpxor %xmm3, %xmm3, %xmm3
vpunpckhbw %xmm3, %xmm0, %xmm0 ## xmm0 = xmm0[8],xmm3[8],xmm0[9],xmm3[9],xmm0[10],xmm3[10],xmm0[11],xmm3[11],xmm0[12],xmm3[12],xmm0[13],xmm3[13],xmm0[14],xmm3[14],xmm0[15],xmm3[15]
vpcmpeqw 16(%rdi), %xmm0, %xmm0
vpxor %xmm2, %xmm0, %xmm0
vpor %xmm1, %xmm0, %xmm0
popq %rbp
retq
.cfi_endproc
## -- End function
.section __TEXT,__literal16,16byte_literals
.p2align 4 ## -- Begin function _Z6equalPPDv8_tDv16_h
LCPI1_0:
.byte 0 ## 0x0
.byte 2 ## 0x2
.byte 4 ## 0x4
.byte 6 ## 0x6
.byte 8 ## 0x8
.byte 10 ## 0xa
.byte 12 ## 0xc
.byte 14 ## 0xe
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
LCPI1_1:
.byte 1 ## 0x1
.byte 3 ## 0x3
.byte 5 ## 0x5
.byte 7 ## 0x7
.byte 9 ## 0x9
.byte 11 ## 0xb
.byte 13 ## 0xd
.byte 15 ## 0xf
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
.space 1
.section __TEXT,__text,regular,pure_instructions
.globl __Z6equalPPDv8_tDv16_h
.p2align 4, 0x90
__Z6equalPPDv8_tDv16_h: ## @_Z6equalPPDv8_tDv16_h
.cfi_startproc
## BB#0:
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
vmovdqu (%rdi), %ymm1
vmovdqa LCPI1_0(%rip), %xmm2 ## xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
vextractf128 $1, %ymm1, %xmm3
vpshufb %xmm2, %xmm3, %xmm4
vpshufb %xmm2, %xmm1, %xmm2
vpunpcklqdq %xmm4, %xmm2, %xmm2 ## xmm2 = xmm2[0],xmm4[0]
vpcmpeqb %xmm0, %xmm2, %xmm0
vmovdqa LCPI1_1(%rip), %xmm2 ## xmm2 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
vpshufb %xmm2, %xmm3, %xmm3
vpshufb %xmm2, %xmm1, %xmm1
vpunpcklqdq %xmm3, %xmm1, %xmm1 ## xmm1 = xmm1[0],xmm3[0]
vpxor %xmm2, %xmm2, %xmm2
vpcmpeqb %xmm2, %xmm1, %xmm1
vpand %xmm0, %xmm1, %xmm0
popq %rbp
vzeroupper
retq
.cfi_endproc
## -- End function
.subsections_via_symbols
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment