Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@allanmac
Last active December 14, 2015 06:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/5043569 to your computer and use it in GitHub Desktop.
Save allanmac/5043569 to your computer and use it in GitHub Desktop.
The `setp` and `selp` instructions are your friends.
extern "C"
__global__
void natural(const unsigned int b,
const unsigned int c,
const unsigned int y,
const unsigned int z,
const unsigned int id,
unsigned int* const out)
{
const bool flag = (id == 1);
const unsigned int a = flag ? b : c;
const unsigned int x = flag ? y : z;
out[0] = a;
out[1] = x;
}
@allanmac
Copy link
Author

Compiled with nvcc -m 32 -arch sm_30 -ptx natural.cu:

//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Tue Sep 25 09:26:26 2012 (1348590386)
// Cuda compilation tools, release 5.0, V0.2.1221
//

.version 3.1
.target sm_30
.address_size 32

.visible .entry natural(
        .param .u32 natural_param_0,
        .param .u32 natural_param_1,
        .param .u32 natural_param_2,
        .param .u32 natural_param_3,
        .param .u32 natural_param_4,
        .param .u32 natural_param_5
)
{
        .reg .pred      %p<2>;
        .reg .s32       %r<10>;

        ld.param.u32    %r1, [natural_param_0];
        ld.param.u32    %r2, [natural_param_1];
        ld.param.u32    %r3, [natural_param_2];
        ld.param.u32    %r4, [natural_param_3];
        ld.param.u32    %r5, [natural_param_4];
        ld.param.u32    %r6, [natural_param_5];

        cvta.to.global.u32      %r7, %r6;

        setp.eq.s32     %p1, %r5, 1;
        selp.b32        %r8, %r1, %r2, %p1;
        selp.b32        %r9, %r3, %r4, %p1;

        st.global.u32   [%r7], %r8;
        st.global.u32   [%r7+4], %r9;

        ret;
}

@allanmac
Copy link
Author

cuobjdump.exe -sass natural.cubin

code for sm_30
    Function : natural
/*0008*/     /*0x10005de428004001*/     MOV R1, c [0x0] [0x44];
/*0010*/     /*0x40001de428004005*/     MOV R0, c [0x0] [0x150];
/*0018*/     /*0x00009de428004005*/     MOV R2, c [0x0] [0x140];
/*0020*/     /*0x2000dde428004005*/     MOV R3, c [0x0] [0x148];
/*0028*/     /*0x0401dc23190ec000*/     ISETP.EQ.AND P0, pt, R0, 0x1, pt;
/*0030*/     /*0x50001de428004005*/     MOV R0, c [0x0] [0x154];
/*0038*/     /*0x10209c0420004005*/     SEL R2, R2, c [0x0] [0x144], P0;
/*0048*/     /*0x3030dc0420004005*/     SEL R3, R3, c [0x0] [0x14c], P0;
/*0050*/     /*0x00009c8590000000*/     ST [R0], R2;
/*0058*/     /*0x1000dc8590000000*/     ST [R0+0x4], R3;
/*0060*/     /*0x00001de780000000*/     EXIT;
/*0068*/     /*0xe0001de74003ffff*/     BRA 0x68;
/*0070*/     /*0x00001de440000000*/     NOP CC.T;
/*0078*/     /*0x00001de440000000*/     NOP CC.T;
    ........................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment