Skip to content

Instantly share code, notes, and snippets.

@allanmac
Created October 7, 2014 22:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/bca063e25a0f4ef75004 to your computer and use it in GitHub Desktop.
Save allanmac/bca063e25a0f4ef75004 to your computer and use it in GitHub Desktop.
Try to generate XMAD instructions
// -*- compile-command: "nvcc -m 32 -arch sm_50 -Xptxas=-v,-abi=no -cubin xmad.cu"; -*-
//
//
//
#define KERNEL_QUALIFIERS extern "C" __global__
#define RESTRICT __restrict__
//
//
//
typedef unsigned int u32;
KERNEL_QUALIFIERS
void vmad_kernel(const short2* const RESTRICT va,
const short2* const RESTRICT vb,
const int* const RESTRICT vc,
int* const vd)
{
const short2 a = va[threadIdx.x];
const short2 b = vb[threadIdx.x];
const int c = vc[threadIdx.x];
int d;
// asm volatile("vmad.s32.s32.s32 %0, %1.h0, %2.h0, %3;" : "=r"(d) : "r"(a), "r"(b), "r"(c));
asm volatile("mad.wide.s16 %0, %1, %2, %3;" : "=r"(d) : "h"(a.x), "h"(b.x), "r"(c));
vd[threadIdx.x] = d;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment