Skip to content

Instantly share code, notes, and snippets.

@allanmac
Created December 29, 2012 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/4409113 to your computer and use it in GitHub Desktop.
Save allanmac/4409113 to your computer and use it in GitHub Desktop.
__global__
void fmaTest(float* const values)
{
const unsigned int tidx = threadIdx.x;
const float b = values[ tidx];
float a = values[2*tidx];
a = __fmaf_rn(a, b, 0.73f);
a = __fmaf_rn(a, b, 0.37f);
a = __fmaf_rn(a, b, 0.24f);
a = __fmaf_rn(a, b, 0.57f);
a = __fmaf_rn(a, b, 0.58f);
a = __fmaf_rn(a, b, 0.14f);
a = __fmaf_rn(a, b, 0.74f);
a = __fmaf_rn(a, b, 0.48f);
a = __fmaf_rn(a, b, 0.21f);
values[2*tidx] = a;
}
@allanmac
Copy link
Author

cuobjdump -sass fma.cubin

code for sm_30
    Function : _Z7fmaTestPf
/*0008*/     /*0x10005de428004001*/     MOV R1, c [0x0] [0x44];
/*0010*/     /*0x84001c042c000000*/     S2R R0, SR_Tid_X;
/*0018*/     /*0x00011de428004005*/     MOV R4, c [0x0] [0x140];
/*0020*/     /*0x1000dc435000c000*/     IMUL.U32.U32.HI R3, R0, 0x4;
/*0028*/     /*0x04009c036000c000*/     SHL R2, R0, 0x1;
/*0030*/     /*0x10019c032009c000*/     IMAD.U32.U32 R6.CC, R0, 0x4, R4;
/*0038*/     /*0x10201c435000c000*/     IMUL.U32.U32.HI R0, R2, 0x4;
/*0048*/     /*0x1031dc4348004005*/     IADD.X R7, R3, c [0x0] [0x144];
/*0050*/     /*0x10211c032009c000*/     IMAD.U32.U32 R4.CC, R2, 0x4, R4;
/*0058*/     /*0x10015c4348004005*/     IADD.X R5, R0, c [0x0] [0x144];
/*0060*/     /*0x00601c8584000000*/     LD.E R0, [R6];
/*0068*/     /*0x0040dc8584000000*/     LD.E R3, [R4];
/*0070*/     /*0x0030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x0];
/*0078*/     /*0x1030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x4];
/*0088*/     /*0x2030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x8];
/*0090*/     /*0x3030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0xc];
/*0098*/     /*0x4030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x10];
/*00a0*/     /*0x5030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x14];
/*00a8*/     /*0x6030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x18];
/*00b0*/     /*0x7030dc0030008800*/     FFMA R3, R3, R0, c [0x2] [0x1c];
/*00b8*/     /*0x80301c0030008800*/     FFMA R0, R3, R0, c [0x2] [0x20];
/*00c8*/     /*0x00401c8594000000*/     ST.E [R4], R0;
/*00d0*/     /*0x00001de780000000*/     EXIT;
/*00d8*/     /*0xe0001de74003ffff*/     BRA 0xd8;
/*00e0*/     /*0x00001de440000000*/     NOP CC.T;
/*00e8*/     /*0x00001de440000000*/     NOP CC.T;
/*00f0*/     /*0x00001de440000000*/     NOP CC.T;
/*00f8*/     /*0x00001de440000000*/     NOP CC.T;
    .............................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment