Skip to content

Instantly share code, notes, and snippets.

@allanmac
Created February 7, 2014 03:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/8856847 to your computer and use it in GitHub Desktop.
Save allanmac/8856847 to your computer and use it in GitHub Desktop.
Why I2I?
// -*- compile-command: "nvcc -m 32 -arch sm_35 -Xptxas=-v,-abi=no -cubin short4.cu"; -*-
#include <stdint.h>
typedef uint32_t u32;
typedef uint64_t u64;
typedef union
{
short4 s16v4;
uint2 b32v2;
u64 b64;
} x64;
typedef union
{
short2 s16v2;
u32 b32;
} x32;
//
//
//
extern "C"
__global__
void t1(const int16_t* const in, int16_t* const out)
{
__shared__ short4 mydata;
mydata = *(short4*)in;
*(short4* const)out = mydata;
}
extern "C"
__global__
void t2(const int16_t* const in, int16_t* const out)
{
*(short4* const)out = *(short4*)in;
}
extern "C"
__global__
void t3(const short4* const in, short4* const out)
{
*out = *in;
}
extern "C"
__global__
void t4(const x64* const in, x64* const out)
{
*out = *in;
}
extern "C"
__global__
void t5(const x64* const in, x64* const out)
{
(*out).b32v2 = (*in).b32v2;
}
extern "C"
__global__
void t6(const x64* const in, x64* const out)
{
(*out).b64 = (*in).b64;
}
extern "C"
__global__
void t7(const x64* const in, x64* const out)
{
(*out).s16v4 = (*in).s16v4;
}
extern "C"
__global__
void t8(const x32* const in, x32* const out)
{
*out = *in;
}
extern "C"
__global__
void t9(const x32* const in, x32* const out)
{
(*out).b32 = (*in).b32;
}
@allanmac
Copy link
Author

allanmac commented Feb 7, 2014

cuobjdump.exe -sass short4.cubin

    code for sm_35
        Function : t4
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x088c8c8c8c11dca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD.64 R4, [R0];          /* 0xc5000000001c0010 */
        /*0018*/                   I2I.U32.U16 R2, R4;      /* 0xe6000000021c180a */
        /*0020*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0028*/                   I2I.U32.U16 R1, R4.H1;   /* 0xe6002000021c1806 */
        /*0030*/                   I2I.U16.U16 R4, R5;      /* 0xe6000000029c1412 */
        /*0038*/                   I2I.U32.U16 R3, R5.H1;   /* 0xe6002000029c180e */
                                                            /* 0x08000001b810a090 */
        /*0048*/                   BFI R2, R1, 0x1010, R2;  /* 0xb7800808081c0409 */
        /*0050*/                   BFI R3, R3, 0x1010, R4;  /* 0xb7801008081c0c0d */
        /*0058*/                   ST.64 [R0], R2;          /* 0xe5000000001c0008 */
        /*0060*/                   EXIT;                    /* 0x18000000001c003c */
        /*0068*/                   BRA 0x68;                /* 0x12007ffffc1c003c */
        /*0070*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................


        Function : t8
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x0880a0a0108cdca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD R0, [R0];             /* 0xc4000000001c0000 */
        /*0018*/                   I2I.U32.U16 R2, R0;      /* 0xe6000000001c180a */
        /*0020*/                   I2I.U32.U16 R1, R0.H1;   /* 0xe6002000001c1806 */
        /*0028*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0030*/                   BFI R1, R1, 0x1010, R2;  /* 0xb7800808081c0405 */
        /*0038*/                   ST [R0], R1;             /* 0xe4000000001c0004 */
                                                            /* 0x08000000000000b8 */
        /*0048*/                   EXIT;                    /* 0x18000000001c003c */
        /*0050*/                   BRA 0x50;                /* 0x12007ffffc1c003c */
        /*0058*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0060*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0068*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0070*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................


        Function : t2
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x088c8c8c8c11dca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD.64 R4, [R0];          /* 0xc5000000001c0010 */
        /*0018*/                   I2I.U32.U16 R2, R4;      /* 0xe6000000021c180a */
        /*0020*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0028*/                   I2I.U32.U16 R1, R4.H1;   /* 0xe6002000021c1806 */
        /*0030*/                   I2I.U16.U16 R4, R5;      /* 0xe6000000029c1412 */
        /*0038*/                   I2I.U32.U16 R3, R5.H1;   /* 0xe6002000029c180e */
                                                            /* 0x08000001b810a090 */
        /*0048*/                   BFI R2, R1, 0x1010, R2;  /* 0xb7800808081c0409 */
        /*0050*/                   BFI R3, R3, 0x1010, R4;  /* 0xb7801008081c0c0d */
        /*0058*/                   ST.64 [R0], R2;          /* 0xe5000000001c0008 */
        /*0060*/                   EXIT;                    /* 0x18000000001c003c */
        /*0068*/                   BRA 0x68;                /* 0x12007ffffc1c003c */
        /*0070*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................


        Function : t6
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                           /* 0x080001b811dca010 */
        /*0008*/                   MOV R1, c[0x0][0x140];  /* 0x64c03c00281c0006 */
        /*0010*/                   MOV R0, c[0x0][0x144];  /* 0x64c03c00289c0002 */
        /*0018*/                   LD.64 R2, [R1];         /* 0xc5000000001c0408 */
        /*0020*/                   ST.64 [R0], R2;         /* 0xe5000000001c0008 */
        /*0028*/                   EXIT;                   /* 0x18000000001c003c */
        /*0030*/                   BRA 0x30;               /* 0x12007ffffc1c003c */
        /*0038*/                   NOP;                    /* 0x85800000001c3c02 */
        ...................


        Function : t3
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x088c8c8c8c11dca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD.64 R4, [R0];          /* 0xc5000000001c0010 */
        /*0018*/                   I2I.U32.U16 R2, R4;      /* 0xe6000000021c180a */
        /*0020*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0028*/                   I2I.U32.U16 R1, R4.H1;   /* 0xe6002000021c1806 */
        /*0030*/                   I2I.U16.U16 R4, R5;      /* 0xe6000000029c1412 */
        /*0038*/                   I2I.U32.U16 R3, R5.H1;   /* 0xe6002000029c180e */
                                                            /* 0x08000001b810a090 */
        /*0048*/                   BFI R2, R1, 0x1010, R2;  /* 0xb7800808081c0409 */
        /*0050*/                   BFI R3, R3, 0x1010, R4;  /* 0xb7801008081c0c0d */
        /*0058*/                   ST.64 [R0], R2;          /* 0xe5000000001c0008 */
        /*0060*/                   EXIT;                    /* 0x18000000001c003c */
        /*0068*/                   BRA 0x68;                /* 0x12007ffffc1c003c */
        /*0070*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................


        Function : t7
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x088c8c8c8c11dca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD.64 R4, [R0];          /* 0xc5000000001c0010 */
        /*0018*/                   I2I.U32.U16 R2, R4;      /* 0xe6000000021c180a */
        /*0020*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0028*/                   I2I.U32.U16 R1, R4.H1;   /* 0xe6002000021c1806 */
        /*0030*/                   I2I.U16.U16 R4, R5;      /* 0xe6000000029c1412 */
        /*0038*/                   I2I.U32.U16 R3, R5.H1;   /* 0xe6002000029c180e */
                                                            /* 0x08000001b810a090 */
        /*0048*/                   BFI R2, R1, 0x1010, R2;  /* 0xb7800808081c0409 */
        /*0050*/                   BFI R3, R3, 0x1010, R4;  /* 0xb7801008081c0c0d */
        /*0058*/                   ST.64 [R0], R2;          /* 0xe5000000001c0008 */
        /*0060*/                   EXIT;                    /* 0x18000000001c003c */
        /*0068*/                   BRA 0x68;                /* 0x12007ffffc1c003c */
        /*0070*/                   NOP;                     /* 0x85800000001c3c02 */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................


        Function : t5
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                           /* 0x080001b811dca010 */
        /*0008*/                   MOV R1, c[0x0][0x140];  /* 0x64c03c00281c0006 */
        /*0010*/                   MOV R0, c[0x0][0x144];  /* 0x64c03c00289c0002 */
        /*0018*/                   LD.64 R2, [R1];         /* 0xc5000000001c0408 */
        /*0020*/                   ST.64 [R0], R2;         /* 0xe5000000001c0008 */
        /*0028*/                   EXIT;                   /* 0x18000000001c003c */
        /*0030*/                   BRA 0x30;               /* 0x12007ffffc1c003c */
        /*0038*/                   NOP;                    /* 0x85800000001c3c02 */
        ...................


        Function : t9
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                           /* 0x080000b810dca010 */
        /*0008*/                   MOV R0, c[0x0][0x140];  /* 0x64c03c00281c0002 */
        /*0010*/                   MOV R1, c[0x0][0x144];  /* 0x64c03c00289c0006 */
        /*0018*/                   LD R0, [R0];            /* 0xc4000000001c0000 */
        /*0020*/                   ST [R1], R0;            /* 0xe4000000001c0400 */
        /*0028*/                   EXIT;                   /* 0x18000000001c003c */
        /*0030*/                   BRA 0x30;               /* 0x12007ffffc1c003c */
        /*0038*/                   NOP;                    /* 0x85800000001c3c02 */
        ...................


        Function : t1
    .headerflags    @"EF_CUDA_SM35 EF_CUDA_PTX_SM(EF_CUDA_SM35)"
                                                            /* 0x08908c8c8c8ddca0 */
        /*0008*/                   MOV R0, c[0x0][0x140];   /* 0x64c03c00281c0002 */
        /*0010*/                   LD.64 R4, [R0];          /* 0xc5000000001c0010 */
        /*0018*/                   I2I.U16.U16 R3, R5;      /* 0xe6000000029c140e */
        /*0020*/                   I2I.U32.U16 R0, R5.H1;   /* 0xe6002000029c1802 */
        /*0028*/                   I2I.U32.U16 R2, R4;      /* 0xe6000000021c180a */
        /*0030*/                   I2I.U32.U16 R1, R4.H1;   /* 0xe6002000021c1806 */
        /*0038*/                   BFI R3, R0, 0x1010, R3;  /* 0xb7800c08081c000d */
                                                            /* 0x080001b810b8a010 */
        /*0048*/                   MOV R0, c[0x0][0x144];   /* 0x64c03c00289c0002 */
        /*0050*/                   BFI R2, R1, 0x1010, R2;  /* 0xb7800808081c0409 */
        /*0058*/                   STS.64 [RZ], R2;         /* 0x7ae80000001ffc0a */
        /*0060*/                   ST.64 [R0], R2;          /* 0xe5000000001c0008 */
        /*0068*/                   EXIT;                    /* 0x18000000001c003c */
        /*0070*/                   BRA 0x70;                /* 0x12007ffffc1c003c */
        /*0078*/                   NOP;                     /* 0x85800000001c3c02 */
        ...................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment