This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
#define LAUNCH_BOUNDS // __launch_bounds__(512) | |
#define DEVICE_FUNCTION_QUALIFIERS __device__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__global__ | |
void fmuladdTest(float* const values) | |
{ | |
const unsigned int tidx = threadIdx.x; | |
const float b = values[ tidx]; | |
float a = values[2*tidx]; | |
a = __fmul_rn(a, b); | |
a = __fadd_rn(a, 0.73f); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__global__ | |
void fmaTest(float* const values) | |
{ | |
const unsigned int tidx = threadIdx.x; | |
const float b = values[ tidx]; | |
float a = values[2*tidx]; | |
a = __fmaf_rn(a, b, 0.73f); | |
a = __fmaf_rn(a, b, 0.37f); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// | |
// | |
#define WARP_SIZE 32 | |
#define RESTRICT __restrict | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
#define DEVICE_INTRINSIC_QUALIFIERS __device__ __forceinline__ | |
DEVICE_INTRINSIC_QUALIFIERS | |
unsigned int |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
#define TYPE unsigned int | |
#define REPS 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern "C" | |
__global__ | |
void natural(const unsigned int b, | |
const unsigned int c, | |
const unsigned int y, | |
const unsigned int z, | |
const unsigned int id, | |
unsigned int* const out) | |
{ | |
const bool flag = (id == 1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define KERNEL_QUALIFIERS extern "C" __global__ | |
KERNEL_QUALIFIERS | |
void fdimfTest(const float x, const float y, float* const fout) | |
{ | |
fout[threadIdx.x] = fdimf(x,y); | |
} | |
KERNEL_QUALIFIERS | |
void fdimfTest2(const float x, const float y, float* const fout) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define KERNEL_QUALIFIERS extern "C" __global__ | |
KERNEL_QUALIFIERS | |
void shflmax(const int* const vin, int* const vout) | |
{ | |
int v = vin[threadIdx.x]; | |
v = max(v,__shfl_xor(v,16)); | |
v = max(v,__shfl_xor(v, 8)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
// | |
// | |
// | |
#define WARP_SIZE 32 | |
#define KERNEL_QUALIFIERS extern "C" __global__ | |
#define DEVICE_INTRINSIC_QUALIFIERS __device__ __forceinline__ |
OlderNewer