Skip to content

Instantly share code, notes, and snippets.

@Artem-B
Last active September 20, 2021 23:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Artem-B/ec4290809650f5092d61d6dafa6b0131 to your computer and use it in GitHub Desktop.
Save Artem-B/ec4290809650f5092d61d6dafa6b0131 to your computer and use it in GitHub Desktop.
namespace {
template <int N>
struct __Tag;
# 54 "__clang_cuda_texture_intrinsics.h" 3
template <class>
struct __FT;
template <>
struct __FT<float> {
using __bt = float;
using __ft = float4;
};
template <>
struct __FT<char> {
using __bt = char;
using __ft = int4;
};
template <>
struct __FT<signed char> {
using __bt = signed char;
using __ft = int4;
};
template <>
struct __FT<unsigned char> {
using __bt = unsigned char;
using __ft = uint4;
};
template <>
struct __FT<short> {
using __bt = short;
using __ft = int4;
};
template <>
struct __FT<ushort> {
using __bt = ushort;
using __ft = uint4;
};
template <>
struct __FT<int> {
using __bt = int;
using __ft = int4;
};
template <>
struct __FT<uint> {
using __bt = uint;
using __ft = uint4;
};
template <class __T>
struct __FT {
using __bt = decltype(__T::x);
using __ft = typename __FT<__bt>::__ft;
};
template <class __op>
struct __tex_fetch_v4;
template <>
struct __tex_fetch_v4<__Tag<-1>>;
# 192 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj,
float __x) {
int4 __r;
asm("tex.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x) {
uint4 __r;
asm("tex.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x) {
float4 __r;
asm("tex.1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x) {
float4 __r;
asm("tex.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x) {
float4 __r;
asm("tex.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, int __x);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, int __x) {
int4 __r;
asm("tex.1d.v4"
".s32."
"s32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__x));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
int __x) {
uint4 __r;
asm("tex.1d.v4"
".u32."
"s32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
int __x) {
float4 __r;
asm("tex.1d.v4"
".f32."
"s32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
int __x);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
int __x) {
float4 __r;
asm("tex.1d.v4"
".s32."
"s32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
int __x) {
float4 __r;
asm("tex.1d.v4"
".u32."
"s32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1D")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1D_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1Dfetch")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1Dfetch_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __dPdx, float __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __dPdx, float __dPdy) {
int4 __r;
asm("tex.grad.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __dPdx,
float __dPdy) {
uint4 __r;
asm("tex.grad.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __dPdx,
float __dPdy) {
float4 __r;
asm("tex.grad.1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __dPdx,
float __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __dPdx,
float __dPdy) {
float4 __r;
asm("tex.grad.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __dPdx,
float __dPdy) {
float4 __r;
asm("tex.grad.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DGrad")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
int __layer);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
int __layer) {
int4 __r;
asm("tex.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer) {
uint4 __r;
asm("tex.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, int __layer) {
float4 __r;
asm("tex.a1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, int __layer);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, int __layer) {
float4 __r;
asm("tex.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer) {
float4 __r;
asm("tex.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayered")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayered_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
int __layer, float __dPdx,
float __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
int __layer, float __dPdx,
float __dPdy) {
int4 __r;
asm("tex.grad.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __dPdx, float __dPdy) {
uint4 __r;
asm("tex.grad.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __dPdx, float __dPdy) {
float4 __r;
asm("tex.grad.a1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, int __layer,
float __dPdx, float __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __dPdx, float __dPdy) {
float4 __r;
asm("tex.grad.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __dPdx, float __dPdy) {
float4 __r;
asm("tex.grad.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayeredGrad")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
int __layer, float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
int __layer, float __level) {
int4 __r;
asm("tex.level.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __level) {
uint4 __r;
asm("tex.level.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __level) {
float4 __r;
asm("tex.level.a1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, int __layer,
float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __level) {
float4 __r;
asm("tex.level.a1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, int __layer,
float __level) {
float4 __r;
asm("tex.level.a1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLayeredLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLayeredLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __level) {
int4 __r;
asm("tex.level.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], %6;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __level) {
uint4 __r;
asm("tex.level.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], %6;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __level) {
float4 __r;
asm("tex.level.1d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], %6;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __level) {
float4 __r;
asm("tex.level.1d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], %6;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __level) {
float4 __r;
asm("tex.level.1d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5}], %6;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex1DLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex1DLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y) {
int4 __r;
asm("tex.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y) {
uint4 __r;
asm("tex.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y) {
float4 __r;
asm("tex.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y) {
float4 __r;
asm("tex.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y) {
float4 __r;
asm("tex.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2D")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2D_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2D_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
" selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
" selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
" selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, const float2* __dPdx,
const float2* __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, const float2* __dPdx,
const float2* __dPdy) {
int4 __r;
asm("tex.grad.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy) {
uint4 __r;
asm("tex.grad.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DGrad_v2")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DGrad_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
"f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
# 259 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer) {
int4 __r;
asm("tex.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer) {
uint4 __r;
asm("tex.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer) {
float4 __r;
asm("tex.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y,
int __layer);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer) {
float4 __r;
asm("tex.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer) {
float4 __r;
asm("tex.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayered")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayered_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayered_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y));
return __r;
}
};
# 273 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy) {
int4 __r;
asm("tex.grad.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy) {
uint4 __r;
asm("tex.grad.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy) {
float4 __r;
asm("tex.grad.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredGrad_v2")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredGrad_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, "
"%12};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
const float2* __dPdx,
const float2* __dPdy,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, "
"%12};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(
cudaTextureObject_t __obj, float __x, float __y, int __layer,
const float2* __dPdx, const float2* __dPdy, unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, "
"%12};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y));
return __r;
}
};
# 293 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer, float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
float __level) {
int4 __r;
asm("tex.level.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
float __level) {
uint4 __r;
asm("tex.level.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer, float __level) {
float4 __r;
asm("tex.level.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
float __level) {
float4 __r;
asm("tex.level.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
float __level) {
float4 __r;
asm("tex.level.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLayeredLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLayeredLod_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __layer, float __level,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __layer,
float __level, unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.a2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __layer,
float __level,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.a2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __layer, float __level,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.a2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
# 308 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __level) {
int4 __r;
asm("tex.level.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level) {
uint4 __r;
asm("tex.level.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level) {
float4 __r;
asm("tex.level.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y,
float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level) {
float4 __r;
asm("tex.level.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level) {
float4 __r;
asm("tex.level.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}], %7;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2DLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2DLod_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __level,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __level,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
float __level,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__level));
return __r;
}
};
# 339 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 0>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __comp) {
int4 __r;
asm("tld4.r.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
uint4 __r;
asm("tld4.r.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.r.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 0>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __comp);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
float4 __r;
asm("tld4.r.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.r.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 0>>
: __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 0>> {};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 0>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.r.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir, int __comp) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.r.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir,
int __comp) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.r.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
;
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 1>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __comp) {
int4 __r;
asm("tld4.g.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
uint4 __r;
asm("tld4.g.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.g.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 1>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __comp);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
float4 __r;
asm("tld4.g.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.g.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 1>>
: __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 1>> {};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 1>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.g.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir, int __comp) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.g.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir,
int __comp) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.g.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
;
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 2>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __comp) {
int4 __r;
asm("tld4.b.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
uint4 __r;
asm("tld4.b.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.b.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 2>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __comp);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
float4 __r;
asm("tld4.b.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.b.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 2>>
: __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 2>> {};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 2>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.b.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir, int __comp) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.b.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir,
int __comp) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.b.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
;
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 3>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, int __comp) {
int4 __r;
asm("tld4.a.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
uint4 __r;
asm("tld4.a.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.a.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 + 3>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, int __comp);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, int __comp) {
float4 __r;
asm("tld4.a.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
float4 __r;
asm("tld4.a.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather") * 100 + 3>>
: __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 + 3>> {};
template <>
struct __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 + 3>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.a.2d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir, int __comp) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.a.2d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
unsigned char* __ir,
int __comp) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tld4.a.2d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y));
return __r;
}
};
;
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_v2")>> {
template <class __T>
__attribute__((device)) static __T __run(cudaTextureObject_t __obj, float __x,
float __y, int __comp) {
switch (__comp) {
case 0:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 +
0>>::__run<__T>(__obj, __x, __y, __comp);
case 1:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 +
1>>::__run<__T>(__obj, __x, __y, __comp);
case 2:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 +
2>>::__run<__T>(__obj, __x, __y, __comp);
case 3:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_v2") * 100 +
3>>::__run<__T>(__obj, __x, __y, __comp);
}
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2Dgather")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex2Dgather_rmnf_v2")>> {
template <class __T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y,
int __comp) {
switch (__comp) {
case 0:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 +
0>>::__run<__T>(__obj, __x, __y, __comp);
case 1:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 +
1>>::__run<__T>(__obj, __x, __y, __comp);
case 2:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 +
2>>::__run<__T>(__obj, __x, __y, __comp);
case 3:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__tex2Dgather_rmnf_v2") * 100 +
3>>::__run<__T>(__obj, __x, __y, __comp);
}
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex2Dgather_sparse")>> {
template <class __T>
__attribute__((device)) static __T __run(cudaTextureObject_t __obj, float __x,
float __y, unsigned char* __ir,
int __comp) {
switch (__comp) {
case 0:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 +
0>>::__run<__T>(__obj, __x, __y, __ir, __comp);
case 1:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 +
1>>::__run<__T>(__obj, __x, __y, __ir, __comp);
case 2:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 +
2>>::__run<__T>(__obj, __x, __y, __ir, __comp);
case 3:
return __tex_fetch_v4<
__Tag<10000 + __nvvm_texture_op("__itex2Dgather_sparse") * 100 +
3>>::__run<__T>(__obj, __x, __y, __ir, __comp);
}
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z) {
int4 __r;
asm("tex.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
uint4 __r;
asm("tex.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
float __z) {
float4 __r;
asm("tex.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
float4 __r;
asm("tex.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
float4 __r;
asm("tex.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3D")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3D_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3D_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
# 420 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
int4 __r;
asm("tex.grad.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
uint4 __r;
asm("tex.grad.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DGrad_v2")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DGrad_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.grad.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
};
# 441 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
float __level) {
int4 __r;
asm("tex.level.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
uint4 __r;
asm("tex.level.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__tex3DLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itex3DLod_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, float __level,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z, float __level,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.3d.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.3d.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.level.3d.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
# 457 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z) {
int4 __r;
asm("tex.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
uint4 __r;
asm("tex.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y,
float __z) {
float4 __r;
asm("tex.cube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
float4 __r;
asm("tex.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z) {
float4 __r;
asm("tex.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemap")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemap_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemap_sparse")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z,
unsigned char* __ir);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
unsigned char* __ir) {
int4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
unsigned char* __ir) {
uint4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
unsigned char* __ir) {
float4 __r;
asm("{.reg .pred %%p0;\n\t"
"tex.cube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
"selp.u16 %4, 1, 0, %%p0; }"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w),
"=h"(*__ir)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
int4 __r;
asm("tex.grad.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
uint4 __r;
asm("tex.grad.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.cube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
"{%8, %9, %10, %10}, {%11, %12, %13, %13};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
"f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
"f"(__dPdy->z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapGrad_v2")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer) {
int4 __r;
asm("tex.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer) {
uint4 __r;
asm("tex.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer) {
float4 __r;
asm("tex.acube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer) {
float4 __r;
asm("tex.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer) {
float4 __r;
asm("tex.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayered")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayered_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer,
const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer,
const float4* __dPdx,
const float4* __dPdy) {
int4 __r;
asm("tex.grad.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z),
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x),
"f"(__dPdy->y), "f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, const float4* __dPdx,
const float4* __dPdy) {
uint4 __r;
asm("tex.grad.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z),
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x),
"f"(__dPdy->y), "f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer,
const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.acube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z),
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x),
"f"(__dPdy->y), "f"(__dPdy->z));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, const float4* __dPdx,
const float4* __dPdy);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z),
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x),
"f"(__dPdy->y), "f"(__dPdy->z));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, const float4* __dPdx,
const float4* __dPdy) {
float4 __r;
asm("tex.grad.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
"{%9, %10, %11, %11}, {%12, %13, %14, %14};"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z),
"f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x),
"f"(__dPdy->y), "f"(__dPdy->z));
return __r;
}
};
# 493 "__clang_cuda_texture_intrinsics.h" 3
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayeredGrad_v2")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredGrad_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer,
float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z, int __layer,
float __level) {
int4 __r;
asm("tex.level.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, float __level) {
uint4 __r;
asm("tex.level.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, float __level) {
float4 __r;
asm("tex.level.acube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<
__Tag<__nvvm_texture_op("__texCubemapLayeredLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, float __level) {
float4 __r;
asm("tex.level.acube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
int __layer, float __level) {
float4 __r;
asm("tex.level.acube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLayeredLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLayeredLod_v2")>> {};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_v2")>> {
template <class T>
__attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x,
float __y, float __z, float __level);
template <>
__attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x,
float __y, float __z,
float __level) {
int4 __r;
asm("tex.level.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
uint4 __r;
asm("tex.level.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.cube.v4"
".f32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"f"(__r.x),
"="
"f"(__r.y),
"="
"f"(__r.z),
"="
"f"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_rmnf_v2")>> {
template <class T>
__attribute__((device)) static float4 __run(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level);
template <>
__attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.cube.v4"
".s32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
template <>
__attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj,
float __x, float __y, float __z,
float __level) {
float4 __r;
asm("tex.level.cube.v4"
".u32."
"f32"
"\t"
"{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;"
: "="
"r"(__r.x),
"="
"r"(__r.y),
"="
"r"(__r.z),
"="
"r"(__r.w)
: "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level));
return __r;
}
};
template <>
struct __tex_fetch_v4<__Tag<__nvvm_texture_op("__itexCubemapLod")>>
: __tex_fetch_v4<__Tag<__nvvm_texture_op("__texCubemapLod_v2")>> {};
template <class __DestT, class __SrcT>
struct __convert {
template <bool __IsConvertible = std::is_convertible<__DestT, __SrcT>::value,
int __N = sizeof(__DestT) / sizeof(typename __FT<__DestT>::__bt)>
__attribute__((device)) static __DestT __run(__SrcT __v) {
return __v;
}
template <>
__attribute__((device)) static __DestT __run<false, 1>(__SrcT __v) {
return {__v.x};
}
template <>
__attribute__((device)) static __DestT __run<false, 2>(__SrcT __v) {
return {__v.x, __v.y};
}
template <>
__attribute__((device)) static __DestT __run<false, 3>(__SrcT __v) {
return {__v.x, __v.y, __v.z};
}
template <>
__attribute__((device)) static __DestT __run<false, 4>(__SrcT __v) {
return {__v.x, __v.y, __v.z, __v.w};
}
};
template <class __op, class __T, class... __Args>
__attribute__((device)) static void __tex_fetch(__T* __ptr,
cudaTextureObject_t __handle,
__Args... __args) {
using __FT = typename __FT<__T>::__ft;
*__ptr = __convert<__T, __FT>::__run(
__tex_fetch_v4<__op>::template __run<__FT>(__handle, __args...));
}
template <class __T>
__attribute__((device)) cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
cudaTextureObject_t __obj;
asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle));
return __obj;
}
template <class __op, class __T, class __HandleT, class... __Args>
__attribute__((device)) static void __tex_fetch(__T* __ptr, __HandleT __handle,
__Args... __args) {
using __FT = typename __FT<__T>::__ft;
*__ptr =
__convert<__T, __FT>::__run(__tex_fetch_v4<__op>::template __run<__FT>(
__tex_handle_to_obj(__handle), __args...));
}
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__attribute__((device)) static void __tex_fetch(
__DataT*, __RetT* __ptr,
texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
__Args... __args) {
using __FT = typename __FT<__DataT>::__ft;
*__ptr = __convert<__RetT, float4>::__run(
__tex_fetch_v4<__op>::template __run<__FT>(__tex_handle_to_obj(__handle),
__args...));
}
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__attribute__((device)) static void __tex_fetch(
__DataT*, __RetT* __ptr,
texture<__DataT, __TexT, cudaReadModeElementType> __handle,
__Args... __args) {
using __FT = typename __FT<__DataT>::__ft;
*__ptr =
__convert<__RetT, __FT>::__run(__tex_fetch_v4<__op>::template __run<__FT>(
__tex_handle_to_obj(__handle), __args...));
}
} // namespace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment