allanmac/fdimf.cu

## fdimf.cu
#define KERNEL_QUALIFIERS extern "C" __global__

KERNEL_QUALIFIERS
void fdimfTest(const float x, const float y, float* const fout)
{
  fout[threadIdx.x] = fdimf(x,y);
}

KERNEL_QUALIFIERS
void fdimfTest2(const float x, const float y, float* const fout)
{
  const float d = x - y;

  fout[threadIdx.x] = (d >= 0.0f) ? d : 0.0f;
}

KERNEL_QUALIFIERS
void fdimfTest3(const float x, const float y, float* const fout)
{
  const float rz = 0.0f;
  float       d;

  asm("sub.f32       %0, %1, %2;"     : "=f"(d) : "f"(x), "f"(y));
  asm("slct.f32.f32  %0, %0, %1, %0;" : "+f"(d) : "f"(rz));

  fout[threadIdx.x] = d;
}

KERNEL_QUALIFIERS
void fdimfTest4(const float x, const float y, float* const fout)
{
  fout[threadIdx.x] = fmaxf(x-y,0.0f);
}
	#define KERNEL_QUALIFIERS extern "C" __global__

	KERNEL_QUALIFIERS
	void fdimfTest(const float x, const float y, float* const fout)
	{
	fout[threadIdx.x] = fdimf(x,y);
	}

	KERNEL_QUALIFIERS
	void fdimfTest2(const float x, const float y, float* const fout)
	{
	const float d = x - y;

	fout[threadIdx.x] = (d >= 0.0f) ? d : 0.0f;
	}

	KERNEL_QUALIFIERS
	void fdimfTest3(const float x, const float y, float* const fout)
	{
	const float rz = 0.0f;
	float d;

	asm("sub.f32 %0, %1, %2;" : "=f"(d) : "f"(x), "f"(y));
	asm("slct.f32.f32 %0, %0, %1, %0;" : "+f"(d) : "f"(rz));

	fout[threadIdx.x] = d;
	}

	KERNEL_QUALIFIERS
	void fdimfTest4(const float x, const float y, float* const fout)
	{
	fout[threadIdx.x] = fmaxf(x-y,0.0f);
	}