Skip to content

Instantly share code, notes, and snippets.

@StefanKarpinski
Last active December 19, 2015 19:49
Show Gist options
  • Save StefanKarpinski/9092d04bc24c44493d08 to your computer and use it in GitHub Desktop.
Save StefanKarpinski/9092d04bc24c44493d08 to your computer and use it in GitHub Desktop.
diff --git a/base/exports.jl b/base/exports.jl
index ce47f3f..69fc4de 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -330,7 +330,7 @@ export
fld,
flipsign,
float,
- #float16,
+ float16,
float32,
float64,
floor,
diff --git a/base/float.jl b/base/float.jl
index 3b782c4..ab550ab 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -1,7 +1,10 @@
-#bitstype 16 Float16 <: FloatingPoint
+## non-core floating point types ##
+
+bitstype 16 Float16 <: FloatingPoint
+
## conversions to floating-point ##
-for t1 in (Float32,Float64) #,Float16)
+for t1 in (Float32,Float64,Float16)
for st in (Int8,Int16,Int32,Int64,Int128)
@eval begin
convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
@@ -15,12 +18,98 @@ for t1 in (Float32,Float64) #,Float16)
end
end
end
-#convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(x,Float16))
-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))
-# REPLACE when enabling Float16
-#convert(::Type{Float64}, x::Union(Float32,Float16)) = box(Float64,fpext(Float64,x))
+function convert(::Type{Float32}, val::Float16)
+ val = uint32(reinterpret(Uint16, val))
+ sign = (val & 0x8000) >> 15
+ exp = (val & 0x7c00) >> 10
+ sig = (val & 0x3ff) >> 0
+ ret::Uint32
+
+ if exp == 0
+ if sig == 0
+ sign = sign << 31
+ ret = sign | exp | sig
+ else
+ n_bit = 1
+ bit = 0x0200
+ while (bit & sig) == 0
+ n_bit = n_bit + 1
+ bit = bit >> 1
+ end
+ sign = sign << 31
+ exp = (-14 - n_bit + 127) << 23
+ sig = ((sig & (~bit)) << n_bit) << (23 - 10)
+ ret = sign | exp | sig
+ end
+ elseif exp == 0x1f
+ if sig == 0
+ if sign == 0
+ ret = 0x7f800000
+ else
+ ret = 0xff800000
+ end
+ else
+ ret = 0xffffffff
+ end
+ else
+ sign = sign << 31
+ exp = (exp - 15 + 127) << 23
+ sig = sig << (23 - 10)
+ ret = sign | exp | sig
+ end
+ return reinterpret(Float32, ret)
+end
+
+function convert(::Type{Float64}, val::Float16)
+ val = uint64(reinterpret(Uint16, val))
+ sign = (val & 0x8000) >> 15
+ exp = (val & 0x7c00) >> 10
+ sig = (val & 0x3ff) >> 0
+ ret::Uint64
+
+ if exp == 0
+ if sig == 0
+ sign = sign << 63
+ ret = sign | exp | sig
+ else
+ n_bit = 1
+ bit = 0x0200
+ while (bit & sig) == 0
+ n_bit = n_bit + 1
+ bit = bit >> 1
+ end
+ sign = sign << 63
+ exp = (-14 - n_bit + 1023) << 52
+ sig = ((sig & (~bit)) << n_bit) << (52 - 10)
+ ret = sign | exp | sig
+ end
+ elseif exp == 0x1f
+ if sig == 0
+ if sign == 0
+ ret = 0x7ff0000000000000
+ else
+ ret = 0xfff0000000000000
+ end
+ else
+ ret = 0xffffffffffffffff
+ end
+ else
+ sign = sign << 63
+ exp = (exp - 15 + 1023) << 52
+ sig = sig << (52 - 10)
+ ret = sign | exp | sig
+ end
+
+ return reinterpret(Float64, ret)
+end
+
+convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(Float16,x))
+
+# TODO: replace above manual Float16 conversion once LLVM fixes 16-bit truncation
+#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
+#convert(::Type{Float64}, x::Float16) = box(Float64,fpext(Float64,x))
convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,x))
convert(::Type{FloatingPoint}, x::Bool) = convert(Float32, x)
@@ -36,7 +125,7 @@ convert(::Type{FloatingPoint}, x::Uint32) = convert(Float64, x)
convert(::Type{FloatingPoint}, x::Uint64) = convert(Float64, x) # LOSSY
convert(::Type{FloatingPoint}, x::Uint128) = convert(Float64, x) # LOSSY
-#float16(x) = convert(Float16, x)
+float16(x) = convert(Float16, x)
float32(x) = convert(Float32, x)
float64(x) = convert(Float64, x)
float(x) = convert(FloatingPoint, x)
@@ -95,10 +184,10 @@ floor(x::Float64) = ccall((:floor, Base.libm_name), Float64, (Float64,), x)
## floating point promotions ##
-#promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
+promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
promote_rule(::Type{Float64}, ::Type{Float32}) = Float64
-#morebits(::Type{Float16}) = Float32
+morebits(::Type{Float16}) = Float32
morebits(::Type{Float32}) = Float64
## floating point arithmetic ##
diff --git a/base/io.jl b/base/io.jl
index 74fc9d0..56f3d68 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -49,7 +49,7 @@ else
end
write(s::IO, x::Bool) = write(s, uint8(x))
-#write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
+write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
write(s::IO, x::Float32) = write(s, reinterpret(Int32,x))
write(s::IO, x::Float64) = write(s, reinterpret(Int64,x))
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index a2e3b2c..5a40814 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -44,7 +44,6 @@ namespace JL_I {
using namespace JL_I;
#include "ccall.cpp"
-#define DISABLE_FLOAT16
/*
low-level intrinsics design:
@@ -60,12 +59,9 @@ using namespace JL_I;
static Type *FTnbits(size_t nb)
{
- #ifndef DISABLE_FLOAT16
if(nb == 16)
return Type::getHalfTy(jl_LLVMContext);
- else
- #endif
- if(nb == 32)
+ else if(nb == 32)
return Type::getFloatTy(jl_LLVMContext);
else if(nb == 64)
return Type::getDoubleTy(jl_LLVMContext);
@@ -141,12 +137,9 @@ static Value *emit_unboxed(jl_value_t *e, jl_codectx_t *ctx)
#else
#define LLVM_FP(a,b) APFloat(b,true)
#endif
-#ifndef DISABLE_FLOAT16
if (nb == 2)
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEhalf,val)),(jl_value_t*)bt);
- else
-#endif
- if (nb == 4)
+ else if (nb == 4)
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEsingle,val)),(jl_value_t*)bt);
else if (nb == 8)
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEdouble,val)),(jl_value_t*)bt);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment