Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@JCaskey
Created July 13, 2015 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JCaskey/12728199f10c0cd47539 to your computer and use it in GitHub Desktop.
Save JCaskey/12728199f10c0cd47539 to your computer and use it in GitHub Desktop.
Kernel
// Kernel
__global__ void testKernel(float* d_array, int size){
// Index
int idx = blockIdx.x *blockDim.x + threadIdx.x;
// Initialize
d_array[idx] = 0;
// Loop
for(int i = 0; i < 1000; ++i){
// Condition
if(idx % 2 == 0){
d_array[idx] = idx + i + 1;
}
else{
d_array[idx] = idx + i + 2;
}
}
}
@allanmac
Copy link

nvcc -m 32 -arch sm_50 -Xptxas=-v,-abi=no -cubin stripe.cu
ptxas warning : 'option -abi=no' might get deprecated in future
ptxas warning : Stack size for entry function '_Z10testKernelPfi' cannot be statically determined
ptxas info    : 0 bytes gmem
ptxas info    : Compiling entry function '_Z10testKernelPfi' for 'sm_50'
ptxas info    : Used 3 registers, 328 bytes cmem[0]
stripe.cu

>cuobjdump.exe -sass stripe.cubin

    code for sm_50
        Function : _Z10testKernelPfi
    .headerflags    @"EF_CUDA_SM50 EF_CUDA_PTX_SM(EF_CUDA_SM50)"
                                                                                /* 0x083fc400e3e00711 */
        /*0008*/                   S2R R0, SR_CTAID.X;                          /* 0xf0c8000002570000 */
        /*0010*/                   S2R R1, SR_TID.X;                            /* 0xf0c8000002170001 */
        /*0018*/                   XMAD.MRG R2, R0.reuse, c[0x0] [0x8].H1, RZ;  /* 0x4f107f8000270002 */
                                                                                /* 0x081fd800fec207f6 */
        /*0028*/                   XMAD R1, R0.reuse, c[0x0] [0x8], R1;         /* 0x4e00008000270001 */
        /*0030*/                   XMAD.PSL.CBCC R0, R0.H1, R2.H1, R1;          /* 0x5b30009800270000 */
        /*0038*/                   LOP32I.AND R1, R0.reuse, 0x1;                /* 0x0400000000170001 */
                                                                                /* 0x001ff400e22007e2 */
        /*0048*/                   IADD3 R1, R0, 0x3e8, R1;                     /* 0x38c000803e870001 */
        /*0050*/                   I2F.F32.S32 R1, R1;                          /* 0x5cb8000000172a01 */
        /*0058*/                   ISCADD R0, R0, c[0x0][0x140], 0x2;           /* 0x4c18010005070000 */
                                                                                /* 0x001ffc00ffe008f1 */
        /*0068*/                   STG [R0], R1;                                /* 0xeedc000000070001 */
        /*0070*/                   EXIT;                                        /* 0xe30000000007000f */
        /*0078*/                   BRA 0x78;                                    /* 0xe2400fffff87000f */
        ..................................
nvcc -m 32 -arch sm_50 -Xptxas=-v,-abi=no -cubin stripe.cu
ptxas warning : 'option -abi=no' might get deprecated in future
ptxas warning : Stack size for entry function '_Z10testKernelPfi' cannot be statically determined
ptxas info    : 0 bytes gmem
ptxas info    : Compiling entry function '_Z10testKernelPfi' for 'sm_50'
ptxas info    : Used 31 registers, 328 bytes cmem[0]
stripe.cu

>cuobjdump.exe -sass stripe.cubin

    code for sm_50
        Function : _Z10testKernelPfi
    .headerflags    @"EF_CUDA_SM50 EF_CUDA_PTX_SM(EF_CUDA_SM50)"
                                                                                         /* 0x083fc400e3e00711 */
        /*0008*/                   S2R R0, SR_CTAID.X;                                   /* 0xf0c8000002570000 */
        /*0010*/                   S2R R1, SR_TID.X;                                     /* 0xf0c8000002170001 */
        /*0018*/                   XMAD.MRG R2, R0.reuse, c[0x0] [0x8].H1, RZ;           /* 0x4f107f8000270002 */
                                                                                         /* 0x081fc400fec207f6 */
        /*0028*/                   XMAD R1, R0.reuse, c[0x0] [0x8], R1;                  /* 0x4e00008000270001 */
        /*0030*/                   XMAD.PSL.CBCC R1, R0.H1, R2.H1, R1;                   /* 0x5b30009800270001 */
        /*0038*/                   LOP32I.AND R3, R1.reuse, 0x1;                         /* 0x0400000000170103 */
                                                                                         /* 0x0003c400fe0007e5 */
        /*0048*/                   ISCADD R0, R1, c[0x0][0x140], 0x2;                    /* 0x4c18010005070100 */
        /*0050*/         {         ISETP.NE.AND P0, PT, R3, RZ, PT;                      /* 0x5b6b03800ff70307 */
        /*0058*/                   STG [R0], RZ;        }                                /* 0xeedc0000000700ff */
                                                                                         /* 0x081fc041fe4007ec */
        /*0068*/                   MOV32I R2, 0x3e8;                                     /* 0x010000003e87f002 */
        /*0070*/              @!P0 IADD32I R3, R1.reuse, 0x1;                            /* 0x1c00000000180103 */
        /*0078*/         {    @!P0 IADD32I R4, R1.reuse, 0x2;                            /* 0x1c00000000280104 */
        /*0088*/              @!P0 I2F.F32.S32 R9, R3;        }                          /* 0x0009c840fe000032 */
                                                                                         /* 0x5cb8000000382a09 */
        /*0090*/         {    @!P0 IADD32I R5, R1.reuse, 0x3;                            /* 0x1c00000000380105 */
        /*0098*/              @!P0 I2F.F32.S32 R11, R4;        }                         /* 0x5cb8000000482a0b */
                                                                                         /* 0x089fc00012420ff0 */
        /*00a8*/         {    @!P0 IADD32I R3, R1.reuse, 0x4;                            /* 0x1c00000000480103 */
        /*00b0*/              @!P0 I2F.F32.S32 R13, R5;        }                         /* 0x5cb8000000582a0d */
        /*00b8*/         {    @!P0 IADD32I R4, R1.reuse, 0x5;                            /* 0x1c00000000580104 */
        /*00c8*/              @!P0 I2F.F32.S32 R21, R3;        }                         /* 0x0002c441fe0002b2 */
                                                                                         /* 0x5cb8000000382a15 */
        /*00d0*/         {    @!P0 IADD32I R5, R1.reuse, 0x6;                            /* 0x1c00000000680105 */
        /*00d8*/              @!P0 I2F.F32.S32 R3, R4;        }                          /* 0x5cb8000000482a03 */
                                                                                         /* 0x081fc0023e2207f0 */
        /*00e8*/         {    @!P0 IADD32I R6, R1.reuse, 0x7;                            /* 0x1c00000000780106 */
        /*00f0*/              @!P0 STG [R0], R9;        }                                /* 0xeedc000000080009 */
        /*00f8*/         {    @!P0 IADD32I R7, R1.reuse, 0x8;                            /* 0x1c00000000880107 */
        /*0108*/              @!P0 I2F.F32.S32 R5, R5;        }                          /* 0x001ec440fe0007b1 */
                                                                                         /* 0x5cb8000000582a05 */
        /*0110*/         {    @!P0 IADD32I R10, R1.reuse, 0x9;                           /* 0x1c0000000098010a */
        /*0118*/              @!P0 I2F.F32.S32 R6, R6;        }                          /* 0x5cb8000000682a06 */
                                                                                         /* 0x001f8400f62207f0 */
        /*0128*/         {    @!P0 IADD32I R12, R1.reuse, 0xa;                           /* 0x1c00000000a8010c */
        /*0130*/              @!P0 I2F.F32.S32 R7, R7;        }                          /* 0x5cb8000000782a07 */
        /*0138*/              @!P0 IADD32I R26, R1, 0xb;                                 /* 0x1c00000000b8011a */
                                                                                         /* 0x010fc440fe0011b1 */
        /*0148*/              @!P0 I2F.F32.S32 R9, R10;                                  /* 0x5cb8000000a82a09 */
        /*0150*/         {    @!P0 IADD32I R25, R1.reuse, 0xc;                           /* 0x1c00000000c80119 */
        /*0158*/              @!P0 STG [R0], R11;        }                               /* 0xeedc00000008000b */
                                                                                         /* 0x081fc000162207f0 */
        /*0168*/         {    @!P0 IADD32I R14, R1.reuse, 0xd;                           /* 0x1c00000000d8010e */
        /*0170*/              @!P0 I2F.F32.S32 R10, R12;        }                        /* 0x5cb8000000c82a0a */
        /*0178*/         {    @!P0 IADD32I R15, R1.reuse, 0xe;                           /* 0x1c00000000e8010f */
        /*0188*/              @!P0 STG [R0], R13;        }                               /* 0x010e4440fe2083f1 */
                                                                                         /* 0xeedc00000008000d */
        /*0190*/              @!P0 IADD32I R17, R1.reuse, 0xf;                           /* 0x1c00000000f80111 */
        /*0198*/              @!P0 I2F.F32.S32 R11, R26;                                 /* 0x5cb8000001a82a0b */
                                                                                         /* 0x081fc000f2200091 */
        /*01a8*/              @!P0 I2F.F32.S32 R13, R25;                                 /* 0x5cb8000001982a0d */
        /*01b0*/              @!P0 I2F.F32.S32 R14, R14;                                 /* 0x5cb8000000e82a0e */
        /*01b8*/         {    @!P0 IADD32I R18, R1.reuse, 0x10;                          /* 0x1c00000001080112 */
        /*01c8*/              @!P0 I2F.F32.S32 R15, R15;        }                        /* 0x001e4440fe000791 */
                                                                                         /* 0x5cb8000000f82a0f */
        /*01d0*/         {    @!P0 IADD32I R22, R1.reuse, 0x11;                          /* 0x1c00000001180116 */
        /*01d8*/              @!P0 I2F.F32.S32 R17, R17;        }                        /* 0x5cb8000001182a11 */
                                                                                         /* 0x081fc000f22207f0 */
        /*01e8*/         {    @!P0 IADD32I R23, R1.reuse, 0x12;                          /* 0x1c00000001280117 */
        /*01f0*/              @!P0 I2F.F32.S32 R18, R18;        }                        /* 0x5cb8000001282a12 */
        /*01f8*/         {    @!P0 IADD32I R24, R1.reuse, 0x13;                          /* 0x1c00000001380118 */
        /*0208*/              @!P0 STG [R0], R21;        }                               /* 0x001e4440fe0105f1 */
                                                                                         /* 0xeedc000000080015 */
        /*0210*/         {    @!P0 IADD32I R16, R1.reuse, 0x14;                          /* 0x1c00000001480110 */
        /*0218*/              @!P0 I2F.F32.S32 R22, R22;        }                        /* 0x5cb8000001682a16 */
                                                                                         /* 0x081fc000f22007f0 */
        /*0228*/         {         IADD32I R2, R2, -0x19;                                /* 0x1c0ffffffe770202 */
        /*0230*/              @!P0 I2F.F32.S32 R23, R23;        }                        /* 0x5cb8000001782a17 */
        /*0238*/         {    @!P0 IADD32I R19, R1.reuse, 0x15;                          /* 0x1c00000001580113 */
        /*0248*/              @!P0 STG [R0], R3;        }                                /* 0x00024440fe0005f1 */
                                                                                         /* 0xeedc000000080003 */
        /*0250*/         {    @!P0 IADD32I R20, R1.reuse, 0x16;                          /* 0x1c00000001680114 */
        /*0258*/              @!P0 I2F.F32.S32 R25, R24;        }                        /* 0x5cb8000001882a19 */
                                                                                         /* 0x083fc000bfa007f0 */
        /*0268*/         {    @!P0 IADD32I R8, R1, 0x17;                                 /* 0x1c00000001780108 */
        /*0270*/              @!P0 STG [R0], R5;        }                                /* 0xeedc000000080005 */
        /*0278*/         {    @!P0 IADD32I R12, R1.reuse, 0x18;                          /* 0x1c0000000188010c */
        /*0288*/              @!P0 I2F.F32.S32 R21, R16;        }                        /* 0x0003c440fe0100b1 */
                                                                                         /* 0x5cb8000001082a15 */
        /*0290*/         {    @!P0 IADD32I R4, R1.reuse, 0x19;                           /* 0x1c00000001980104 */
        /*0298*/              @!P0 STG [R0], R6;        }                                /* 0xeedc000000080006 */
                                                                                         /* 0x001fc000f62007f0 */
        /*02a8*/         {         ISETP.NE.AND P1, PT, R2, RZ, PT;                      /* 0x5b6b03800ff7020f */
        /*02b0*/              @!P0 I2F.F32.S32 R19, R19;        }                        /* 0x5cb8000001382a13 */
        /*02b8*/         {         IADD32I R1, R1, 0x19;                                 /* 0x1c00000001970101 */
        /*02c8*/              @!P0 STG [R0], R7;        }                                /* 0x0003c400162000f1 */
                                                                                         /* 0xeedc000000080007 */
        /*02d0*/              @!P0 I2F.F32.S32 R26, R20;                                 /* 0x5cb8000001482a1a */
        /*02d8*/              @!P0 STG [R0], R9;                                         /* 0xeedc000000080009 */
                                                                                         /* 0x0002c4001e2000b1 */
        /*02e8*/              @!P0 I2F.F32.S32 R27, R8;                                  /* 0x5cb8000000882a1b */
        /*02f0*/              @!P0 STG [R0], R10;                                        /* 0xeedc00000008000a */
        /*02f8*/              @!P0 I2F.F32.S32 R29, R12;                                 /* 0x5cb8000000c82a1d */
                                                                                         /* 0x0003c800962080f1 */
        /*0308*/              @!P0 STG [R0], R11;                                        /* 0xeedc00000008000b */
        /*0310*/              @!P0 I2F.F32.S32 R30, R4;                                  /* 0x5cb8000000482a1e */
        /*0318*/              @!P0 STG [R0], R13;                                        /* 0xeedc00000008000d */
                                                                                         /* 0x0003c8001e4000f2 */
        /*0328*/              @!P0 STG [R0], R14;                                        /* 0xeedc00000008000e */
        /*0330*/              @!P0 STG [R0], R15;                                        /* 0xeedc00000008000f */
        /*0338*/              @!P0 STG [R0], R17;                                        /* 0xeedc000000080011 */
                                                                                         /* 0x0003c8001e4000f2 */
        /*0348*/              @!P0 STG [R0], R18;                                        /* 0xeedc000000080012 */
        /*0350*/              @!P0 STG [R0], R22;                                        /* 0xeedc000000080016 */
        /*0358*/              @!P0 STG [R0], R23;                                        /* 0xeedc000000080017 */
                                                                                         /* 0x0003c8201e4000f2 */
        /*0368*/              @!P0 STG [R0], R25;                                        /* 0xeedc000000080019 */
        /*0370*/              @!P0 STG [R0], R21;                                        /* 0xeedc000000080015 */
        /*0378*/              @!P0 STG [R0], R19;                                        /* 0xeedc000000080013 */
                                                                                         /* 0x0003c8001e4000f2 */
        /*0388*/              @!P0 STG [R0], R26;                                        /* 0xeedc00000008001a */
        /*0390*/              @!P0 STG [R0], R27;                                        /* 0xeedc00000008001b */
        /*0398*/              @!P0 STG [R0], R29;                                        /* 0xeedc00000008001d */
                                                                                         /* 0x001ffc1fffa000f2 */
        /*03a8*/              @!P0 STG [R0], R30;                                        /* 0xeedc00000008001e */
        /*03b0*/               @P1 BRA 0x70;                                             /* 0xe2400fffcb81000f */
        /*03b8*/                   EXIT;                                                 /* 0xe30000000007000f */
                                                                                         /* 0x001f8000fc0007ff */
        /*03c8*/                   BRA 0x3c0;                                            /* 0xe2400fffff07000f */
        /*03d0*/                   NOP;                                                  /* 0x50b0000000070f00 */
        /*03d8*/                   NOP;                                                  /* 0x50b0000000070f00 */
                                                                                         /* 0x001f8000fc0007e0 */
        /*03e8*/                   NOP;                                                  /* 0x50b0000000070f00 */
        /*03f0*/                   NOP;                                                  /* 0x50b0000000070f00 */
        /*03f8*/                   NOP;                                                  /* 0x50b0000000070f00 */
        ..................................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment