Skip to content

Instantly share code, notes, and snippets.

@allanmac
Last active December 14, 2015 17:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/5125376 to your computer and use it in GitHub Desktop.
Save allanmac/5125376 to your computer and use it in GitHub Desktop.
Inspecting fdimf() output.
#define KERNEL_QUALIFIERS extern "C" __global__
KERNEL_QUALIFIERS
void fdimfTest(const float x, const float y, float* const fout)
{
fout[threadIdx.x] = fdimf(x,y);
}
KERNEL_QUALIFIERS
void fdimfTest2(const float x, const float y, float* const fout)
{
const float d = x - y;
fout[threadIdx.x] = (d >= 0.0f) ? d : 0.0f;
}
KERNEL_QUALIFIERS
void fdimfTest3(const float x, const float y, float* const fout)
{
const float rz = 0.0f;
float d;
asm("sub.f32 %0, %1, %2;" : "=f"(d) : "f"(x), "f"(y));
asm("slct.f32.f32 %0, %0, %1, %0;" : "+f"(d) : "f"(rz));
fout[threadIdx.x] = d;
}
KERNEL_QUALIFIERS
void fdimfTest4(const float x, const float y, float* const fout)
{
fout[threadIdx.x] = fmaxf(x-y,0.0f);
}
@allanmac
Copy link
Author

allanmac commented Mar 9, 2013

Compiled with: nvcc -m 32 -arch sm_35 -cubin fdimf.cu

Dumped with: cuobjdump.exe -sass fdimf.cubin

    code for sm_35
        Function : fdimfTest
        ..........................
    /*0020*/     /*0x289c081e5de01c00*/     FSETP.GTU.AND P0, PT, R2, c [0x0] [0x144], PT;
    /*0028*/     /*0x289c080a62c10000*/     FADD R2, R2, -c [0x0] [0x144];
    /*0038*/     /*0x7f9c080ae5000000*/     SEL R2, R2, RZ, P0;
        ..........................

        Function : fdimfTest2
        ..........................
    /*0020*/     /*0x289c080a62c10000*/     FADD R2, R2, -c [0x0] [0x144];
    /*0030*/     /*0x011ffc0add480800*/     FCMP.LTU R2, RZ, R2, R2;
        ...........................

        Function : fdimfTest3
        ...........................
    /*0020*/     /*0x289c080a62c10000*/     FADD R2, R2, -c [0x0] [0x144];
    /*0030*/     /*0x7f9c080add300800*/     FCMP.GE R2, R2, RZ, R2;
        ...........................

        Function : fdimfTest4
        ...........................
    /*0020*/     /*0x289c080a62c10000*/     FADD R2, R2, -c [0x0] [0x144];
    /*0030*/     /*0x7f9c080ae3003c00*/     FMNMX R2, R2, RZ, !PT;
        ...........................

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment