Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
module x86SimdIntrinsics
const VE{N, T} = NTuple{N, VecElement{T}}
const jlt_to_llvmt = Dict(
Int64 => "i64",
UInt64 => "i64",
Int32 => "i32",
UInt32 => "i32",
Int16 => "i16",
UInt16 => "i16",
)
@generated function bitcast(T::VE{N1,T1}, ::Type{VE{N2,T2}}) where {N1, T1, N2, T2}
@assert sizeof(T) == sizeof(VE{N2,T2})
_T1, _T2 = jlt_to_llvmt[T1], jlt_to_llvmt[T2]
exp = """
%2 = bitcast <$N1 x $_T1> %0 to <$N2 x $_T2>
ret <$N2 x $_T2> %2
"""
return quote
Base.llvmcall(
$exp,
VE{N2,T2},
Tuple{VE{N1,T1}},
T)
end
end
function _mm_slli_epi16(x::VE{N,T}, u::UInt32) where {N, T <: Integer}
a = bitcast(x, VE{8, Int16})
b = _mm_slli_epi16(a, u)
bitcast(b, VE{N, T})
end
@generated function _mm_slli_epi16(x::VE{8,Int16}, u::UInt32)
exp = """
%3 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %0, i32 %1)
ret <8 x i16> %3
"""
return quote
Base.llvmcall(
("""
declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32)
""",
$exp),
VE{8,Int16},
Tuple{VE{8,Int16}, UInt32},
x, u)
end
end
function _mm_slli_epi32(x::VE{N,T}, u::UInt32) where {N, T <: Integer}
a = bitcast(x, VE{4, Int32})
b = _mm_slli_epi16(a, u)
bitcast(b, VE{N, T})
end
@generated function _mm_slli_epi32(x::VE{4, Int32}, u::UInt32)
exp = """
%3 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %0, i32 %1)
ret <4 x i32> %3
"""
return quote
Base.llvmcall(
("""
declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32)
""",
$exp),
VE{4,Int32},
Tuple{VE{4,Int32}, UInt32},
x, u)
end
end
end # module
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment