Skip to content

Instantly share code, notes, and snippets.

@allanmac
Created March 14, 2013 20:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/5165008 to your computer and use it in GitHub Desktop.
Save allanmac/5165008 to your computer and use it in GitHub Desktop.
Butterfly max.
#define KERNEL_QUALIFIERS extern "C" __global__
KERNEL_QUALIFIERS
void shflmax(const int* const vin, int* const vout)
{
int v = vin[threadIdx.x];
v = max(v,__shfl_xor(v,16));
v = max(v,__shfl_xor(v, 8));
v = max(v,__shfl_xor(v, 4));
v = max(v,__shfl_xor(v, 2));
v = max(v,__shfl_xor(v, 1));
vout[threadIdx.x] = v;
}
@allanmac
Copy link
Author

Compiled with: nvcc -m 32 -arch sm_30 -cubin shflmax.cu

Dumped with: cuobjdump.exe -sass shflmax.cubin

code for sm_30
    Function : shflmax
/*0008*/     /*0x10005de428004001*/     MOV R1, c [0x0] [0x44];
/*0010*/     /*0x84009c042c000000*/     S2R R2, SR_Tid_X;
/*0018*/     /*0x00201c4340004005*/     ISCADD R0, R2, c [0x0] [0x140], 0x2;
/*0020*/     /*0x00001c8580000000*/     LD R0, [R0];
/*0028*/     /*0x4000df658d807c00*/     SHFL.BFLY pt, R3, R0, 0x10, 0x1f;
/*0030*/     /*0x0c001c23081e0000*/     IMNMX R0, R0, R3, !pt;
/*0038*/     /*0x2000df658d807c00*/     SHFL.BFLY pt, R3, R0, 0x8, 0x1f;
/*0048*/     /*0x0c001c23081e0000*/     IMNMX R0, R0, R3, !pt;
/*0050*/     /*0x1000df658d807c00*/     SHFL.BFLY pt, R3, R0, 0x4, 0x1f;
/*0058*/     /*0x0c001c23081e0000*/     IMNMX R0, R0, R3, !pt;
/*0060*/     /*0x0800df658d807c00*/     SHFL.BFLY pt, R3, R0, 0x2, 0x1f;
/*0068*/     /*0x0c00dc23081e0000*/     IMNMX R3, R0, R3, !pt;
/*0070*/     /*0x10201c4340004005*/     ISCADD R0, R2, c [0x0] [0x144], 0x2;
/*0078*/     /*0x04311f658d807c00*/     SHFL.BFLY pt, R4, R3, 0x1, 0x1f;
/*0088*/     /*0x10309c23081e0000*/     IMNMX R2, R3, R4, !pt;
/*0090*/     /*0x00009c8590000000*/     ST [R0], R2;
/*0098*/     /*0x00001de780000000*/     EXIT;
/*00a0*/     /*0xe0001de74003ffff*/     BRA 0xa0;
/*00a8*/     /*0x00001de440000000*/     NOP CC.T;
/*00b0*/     /*0x00001de440000000*/     NOP CC.T;
/*00b8*/     /*0x00001de440000000*/     NOP CC.T;
    ........................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment