Created
March 5, 2015 00:20
-
-
Save allanmac/4a1851480ca9bf318f68 to your computer and use it in GitHub Desktop.
ld.global.nc (LDG.CI) operations not being generated when const+restrict pointers are within a const struct passed as a kernel argument
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// | |
// | |
#define WARP_SIZE 32 | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
#define DEVICE_INTRINSIC_QUALIFIERS __device__ __forceinline__ | |
#define RESTRICT __restrict__ | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void ldg_good(const unsigned int* const RESTRICT vin, | |
unsigned int* const RESTRICT vout) | |
{ | |
const unsigned int v0 = vin[threadIdx.x+32*0]; | |
const unsigned int v1 = vin[threadIdx.x+32*1]; | |
const unsigned int v2 = vin[threadIdx.x+32*2]; | |
const unsigned int v3 = vin[threadIdx.x+32*3]; | |
vout[threadIdx.x+32*0] = v0; | |
vout[threadIdx.x+32*1] = v1; | |
vout[threadIdx.x+32*2] = v2; | |
vout[threadIdx.x+32*3] = v3; | |
} | |
// | |
// | |
// | |
struct ldg_args | |
{ | |
const unsigned int* RESTRICT vin; | |
unsigned int* RESTRICT vout; | |
}; | |
KERNEL_QUALIFIERS | |
void ldg_bad(const struct ldg_args args) | |
{ | |
const unsigned int v0 = args.vin[threadIdx.x+32*0]; | |
const unsigned int v1 = args.vin[threadIdx.x+32*1]; | |
const unsigned int v2 = args.vin[threadIdx.x+32*2]; | |
const unsigned int v3 = args.vin[threadIdx.x+32*3]; | |
args.vout[threadIdx.x+32*0] = v0; | |
args.vout[threadIdx.x+32*1] = v1; | |
args.vout[threadIdx.x+32*2] = v2; | |
args.vout[threadIdx.x+32*3] = v3; | |
} | |
// | |
// | |
// |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compile the attached snippet with:
nvcc -m 32 -arch sm_50 -Xptxas=-v -cubin ldg.cu
Dump the SASS with:
cuobjdump.exe -sass ldg.cubin
ornvdisasm ldg.cubin
: