Created
July 14, 2013 21:21
-
-
Save allanmac/5996141 to your computer and use it in GitHub Desktop.
Examine the SASS that's generated for barrier reduction operations: __syncthreads_count(), __syncthreads_or(), __syncthreads_and() as well as the regular __syncthreads() barrier op. Somewhat surprisingly these are not mapped to a number of SASS ops. The barrier reductions are executed and the result is moved from a "barrier register" to a regula…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// | |
// | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void | |
sync(const int* const vin, int* const vout) | |
{ | |
const int v = vin[threadIdx.x]; | |
__syncthreads(); | |
vout[threadIdx.x] = v; | |
} | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void | |
syncCount(const int* const vin, int* const vout, unsigned int* const cout) | |
{ | |
const int v = vin[threadIdx.x]; | |
const bool c = v != 0; | |
const unsigned int d = __syncthreads_count(c); | |
cout[threadIdx.x] = d; | |
vout[threadIdx.x] = v; | |
} | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void | |
syncOr(const int* const vin, int* const vout) | |
{ | |
const int v = vin[threadIdx.x]; | |
const bool c = v != 0; | |
const bool p = __syncthreads_or(c); | |
vout[threadIdx.x] = p ? v : 0; | |
} | |
// | |
// | |
// | |
KERNEL_QUALIFIERS | |
void | |
syncAnd(const int* const vin, int* const vout) | |
{ | |
const int v = vin[threadIdx.x]; | |
const bool c = v != 0; | |
const bool p = __syncthreads_and(c); | |
vout[threadIdx.x] = p ? v : 0; | |
} | |
// | |
// | |
// |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compiled with
nvcc -m 32 -arch sm_35 -Xptxas=-v,-abi=no -cubin sync.cu
Dumped with
cuobjdump.exe -sass sync.cubin