Last active
July 1, 2016 02:43
-
-
Save whchung/25875271922806e58ac21ad7d707e3cd to your computer and use it in GitHub Desktop.
Sample code to use __fp16 in HCC. Adopted __gnu_f2h_ieee() and __gnu_h2f_ieee() from compiler-rt in LLVM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <limits.h> | |
#include <stdint.h> | |
typedef uint16_t src_t; | |
typedef uint16_t src_rep_t; | |
#define SRC_REP_C UINT16_C | |
static const int srcSigBits = 10; | |
#define src_rep_t_clz __builtin_clz | |
typedef float dst_t; | |
typedef uint32_t dst_rep_t; | |
#define DST_REP_C UINT32_C | |
static const int dstSigBits = 23; | |
// End of specialization parameters. Two helper routines for conversion to and | |
// from the representation of floating-point data as integer values follow. | |
static __inline src_rep_t srcToRep(src_t x) { | |
const union { src_t f; src_rep_t i; } rep = {.f = x}; | |
return rep.i; | |
} | |
static __inline dst_t dstFromRep(dst_rep_t x) { | |
const union { dst_t f; dst_rep_t i; } rep = {.i = x}; | |
return rep.f; | |
} | |
// End helper routines. Conversion implementation follows. | |
static __inline dst_t __extendXfYf2__(src_t a) { | |
// Various constants whose values follow from the type parameters. | |
// Any reasonable optimizer will fold and propagate all of these. | |
const int srcBits = sizeof(src_t)*CHAR_BIT; | |
const int srcExpBits = srcBits - srcSigBits - 1; | |
const int srcInfExp = (1 << srcExpBits) - 1; | |
const int srcExpBias = srcInfExp >> 1; | |
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; | |
const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; | |
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); | |
const src_rep_t srcAbsMask = srcSignMask - 1; | |
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); | |
const src_rep_t srcNaNCode = srcQNaN - 1; | |
const int dstBits = sizeof(dst_t)*CHAR_BIT; | |
const int dstExpBits = dstBits - dstSigBits - 1; | |
const int dstInfExp = (1 << dstExpBits) - 1; | |
const int dstExpBias = dstInfExp >> 1; | |
const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; | |
// Break a into a sign and representation of the absolute value | |
const src_rep_t aRep = srcToRep(a); | |
const src_rep_t aAbs = aRep & srcAbsMask; | |
const src_rep_t sign = aRep & srcSignMask; | |
dst_rep_t absResult; | |
// If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted | |
// to (signed) int. To avoid that, explicitly cast to src_rep_t. | |
if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { | |
// a is a normal number. | |
// Extend to the destination type by shifting the significand and | |
// exponent into the proper position and rebiasing the exponent. | |
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); | |
absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; | |
} | |
else if (aAbs >= srcInfinity) { | |
// a is NaN or infinity. | |
// Conjure the result by beginning with infinity, then setting the qNaN | |
// bit (if needed) and right-aligning the rest of the trailing NaN | |
// payload field. | |
absResult = (dst_rep_t)dstInfExp << dstSigBits; | |
absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); | |
absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); | |
} | |
else if (aAbs) { | |
// a is denormal. | |
// renormalize the significand and clear the leading bit, then insert | |
// the correct adjusted exponent in the destination type. | |
const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); | |
absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); | |
absResult ^= dstMinNormal; | |
const int resultExponent = dstExpBias - srcExpBias - scale + 1; | |
absResult |= (dst_rep_t)resultExponent << dstSigBits; | |
} | |
else { | |
// a is zero. | |
absResult = 0; | |
} | |
// Apply the signbit to (dst_t)abs(a). | |
const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); | |
return dstFromRep(result); | |
} | |
// Use a forwarding definition and noinline to implement a poor man's alias, | |
// as there isn't a good cross-platform way of defining one. | |
__attribute__((noinline)) float __extendhfsf2(uint16_t a) { | |
return __extendXfYf2__(a); | |
} | |
extern "C" float __gnu_h2f_ieee(uint16_t a) { | |
return __extendhfsf2(a); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <limits.h> | |
#include <stdint.h> | |
typedef float src_t; | |
typedef uint32_t src_rep_t; | |
#define SRC_REP_C UINT32_C | |
static const int srcSigBits = 23; | |
typedef uint16_t dst_t; | |
typedef uint16_t dst_rep_t; | |
#define DST_REP_C UINT16_C | |
static const int dstSigBits = 10; | |
// End of specialization parameters. Two helper routines for conversion to and | |
// from the representation of floating-point data as integer values follow. | |
static __inline src_rep_t srcToRep(src_t x) { | |
const union { src_t f; src_rep_t i; } rep = {.f = x}; | |
return rep.i; | |
} | |
static __inline dst_t dstFromRep(dst_rep_t x) { | |
const union { dst_t f; dst_rep_t i; } rep = {.i = x}; | |
return rep.f; | |
} | |
static __inline dst_t __truncXfYf2__(src_t a) { | |
// Various constants whose values follow from the type parameters. | |
// Any reasonable optimizer will fold and propagate all of these. | |
const int srcBits = sizeof(src_t)*CHAR_BIT; | |
const int srcExpBits = srcBits - srcSigBits - 1; | |
const int srcInfExp = (1 << srcExpBits) - 1; | |
const int srcExpBias = srcInfExp >> 1; | |
const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; | |
const src_rep_t srcSignificandMask = srcMinNormal - 1; | |
const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; | |
const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); | |
const src_rep_t srcAbsMask = srcSignMask - 1; | |
const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; | |
const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); | |
const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); | |
const src_rep_t srcNaNCode = srcQNaN - 1; | |
const int dstBits = sizeof(dst_t)*CHAR_BIT; | |
const int dstExpBits = dstBits - dstSigBits - 1; | |
const int dstInfExp = (1 << dstExpBits) - 1; | |
const int dstExpBias = dstInfExp >> 1; | |
const int underflowExponent = srcExpBias + 1 - dstExpBias; | |
const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; | |
const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; | |
const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; | |
const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); | |
const dst_rep_t dstNaNCode = dstQNaN - 1; | |
// Break a into a sign and representation of the absolute value | |
const src_rep_t aRep = srcToRep(a); | |
const src_rep_t aAbs = aRep & srcAbsMask; | |
const src_rep_t sign = aRep & srcSignMask; | |
dst_rep_t absResult; | |
if (aAbs - underflow < aAbs - overflow) { | |
// The exponent of a is within the range of normal numbers in the | |
// destination format. We can convert by simply right-shifting with | |
// rounding and adjusting the exponent. | |
absResult = aAbs >> (srcSigBits - dstSigBits); | |
absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; | |
const src_rep_t roundBits = aAbs & roundMask; | |
// Round to nearest | |
if (roundBits > halfway) | |
absResult++; | |
// Ties to even | |
else if (roundBits == halfway) | |
absResult += absResult & 1; | |
} | |
else if (aAbs > srcInfinity) { | |
// a is NaN. | |
// Conjure the result by beginning with infinity, setting the qNaN | |
// bit and inserting the (truncated) trailing NaN field. | |
absResult = (dst_rep_t)dstInfExp << dstSigBits; | |
absResult |= dstQNaN; | |
absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; | |
} | |
else if (aAbs >= overflow) { | |
// a overflows to infinity. | |
absResult = (dst_rep_t)dstInfExp << dstSigBits; | |
} | |
else { | |
// a underflows on conversion to the destination type or is an exact | |
// zero. The result may be a denormal or zero. Extract the exponent | |
// to get the shift amount for the denormalization. | |
const int aExp = aAbs >> srcSigBits; | |
const int shift = srcExpBias - dstExpBias - aExp + 1; | |
const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; | |
// Right shift by the denormalization amount with sticky. | |
if (shift > srcSigBits) { | |
absResult = 0; | |
} else { | |
const bool sticky = significand << (srcBits - shift); | |
src_rep_t denormalizedSignificand = significand >> shift | sticky; | |
absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); | |
const src_rep_t roundBits = denormalizedSignificand & roundMask; | |
// Round to nearest | |
if (roundBits > halfway) | |
absResult++; | |
// Ties to even | |
else if (roundBits == halfway) | |
absResult += absResult & 1; | |
} | |
} | |
// Apply the signbit to (dst_t)abs(a). | |
const dst_rep_t result = absResult | sign >> (srcBits - dstBits); | |
return dstFromRep(result); | |
} | |
// Use a forwarding definition and noinline to implement a poor man's alias, | |
// as there isn't a good cross-platform way of defining one. | |
__attribute__((noinline)) uint16_t __truncsfhf2(float a) { | |
return __truncXfYf2__(a); | |
} | |
extern "C" uint16_t __gnu_f2h_ieee(float a) { | |
return __truncsfhf2(a); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp | |
index c234cae..4ad9362 100644 | |
--- a/lib/Frontend/CompilerInvocation.cpp | |
+++ b/lib/Frontend/CompilerInvocation.cpp | |
@@ -1470,6 +1470,12 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, | |
Opts.ImplicitInt = Std.hasImplicitInt(); | |
Opts.CPlusPlusAMP = Std.isCPlusPlusAMP(); | |
+ // Set C++AMP-specific defaults | |
+ if (Opts.CPlusPlusAMP) { | |
+ Opts.NativeHalfType = 1; | |
+ Opts.NativeHalfArgsAndReturns = 1; | |
+ } | |
+ | |
// Set OpenCL Version. | |
Opts.OpenCL = LangStd == LangStandard::lang_opencl || IK == IK_OpenCL; | |
if (LangStd == LangStandard::lang_opencl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__fp16 foo(__fp16 a, __fp16 b) [[hc]][[cpu]] { | |
return a + b; | |
} | |
int main() { | |
__fp16 ret = foo(1.0, 2.0); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Build it with:
Need to use ToT-based HCC frontend (git@github.com:RadeonOpenCompute/hcc-clang-upgrade.git , develop branch), with patch (hcc-clang-upgrade.diff) in this gist applied.