StefanKarpinski/float16.diff Secret

## float16.diff
diff --git a/base/exports.jl b/base/exports.jl
index ce47f3f..69fc4de 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -330,7 +330,7 @@ export
     fld,
     flipsign,
     float,
-    #float16,
+    float16,
     float32,
     float64,
     floor,
diff --git a/base/float.jl b/base/float.jl
index 3b782c4..ab550ab 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -1,7 +1,10 @@
-#bitstype 16 Float16 <: FloatingPoint
+## non-core floating point types ##
+
+bitstype 16 Float16 <: FloatingPoint
+
 ## conversions to floating-point ##

-for t1 in (Float32,Float64) #,Float16)
+for t1 in (Float32,Float64,Float16)
     for st in (Int8,Int16,Int32,Int64,Int128)
         @eval begin
             convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
@@ -15,12 +18,98 @@ for t1 in (Float32,Float64) #,Float16)
         end
     end
 end
-#convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(x,Float16))
-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
 convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))

-# REPLACE when enabling Float16
-#convert(::Type{Float64}, x::Union(Float32,Float16)) = box(Float64,fpext(Float64,x))
+function convert(::Type{Float32}, val::Float16)
+    val = uint32(reinterpret(Uint16, val))
+    sign = (val & 0x8000) >> 15
+    exp  = (val & 0x7c00) >> 10
+    sig  = (val & 0x3ff) >> 0
+    ret::Uint32
+
+    if exp == 0
+        if sig == 0
+            sign = sign << 31
+            ret = sign | exp | sig
+        else
+            n_bit = 1
+            bit = 0x0200
+            while (bit & sig) == 0
+                n_bit = n_bit + 1
+            bit = bit >> 1
+            end
+            sign = sign << 31
+            exp = (-14 - n_bit + 127) << 23
+            sig = ((sig & (~bit)) << n_bit) << (23 - 10)
+            ret = sign | exp | sig
+        end
+    elseif exp == 0x1f
+        if sig == 0
+        if sign == 0
+                ret = 0x7f800000
+            else
+            ret = 0xff800000
+            end
+    else
+            ret = 0xffffffff
+    end
+    else
+        sign = sign << 31
+    exp  = (exp - 15 + 127) << 23
+    sig  = sig << (23 - 10)
+    ret = sign | exp | sig
+    end
+    return reinterpret(Float32, ret)
+end
+
+function convert(::Type{Float64}, val::Float16)
+    val = uint64(reinterpret(Uint16, val))
+    sign = (val & 0x8000) >> 15
+    exp  = (val & 0x7c00) >> 10
+    sig  = (val & 0x3ff) >> 0
+    ret::Uint64
+
+    if exp == 0
+    if sig == 0
+            sign = sign << 63
+            ret = sign | exp | sig
+        else
+            n_bit = 1
+            bit = 0x0200
+            while (bit & sig) == 0
+                n_bit = n_bit + 1
+                bit = bit >> 1
+            end
+            sign = sign << 63
+            exp = (-14 - n_bit + 1023) << 52
+            sig = ((sig & (~bit)) << n_bit) << (52 - 10)
+            ret = sign | exp | sig
+        end
+    elseif exp == 0x1f
+        if sig == 0
+            if sign == 0
+                ret = 0x7ff0000000000000
+            else
+                ret = 0xfff0000000000000
+            end
+        else
+            ret = 0xffffffffffffffff
+        end
+    else
+        sign = sign << 63
+        exp  = (exp - 15 + 1023) << 52
+        sig  = sig << (52 - 10)
+        ret = sign | exp | sig
+    end
+
+    return reinterpret(Float64, ret)
+end
+
+convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(Float16,x))
+
+# TODO: replace above manual Float16 conversion once LLVM fixes 16-bit truncation
+#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
+#convert(::Type{Float64}, x::Float16) = box(Float64,fpext(Float64,x))
 convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,x))

 convert(::Type{FloatingPoint}, x::Bool)    = convert(Float32, x)
@@ -36,7 +125,7 @@ convert(::Type{FloatingPoint}, x::Uint32)  = convert(Float64, x)
 convert(::Type{FloatingPoint}, x::Uint64)  = convert(Float64, x) # LOSSY
 convert(::Type{FloatingPoint}, x::Uint128) = convert(Float64, x) # LOSSY

-#float16(x) = convert(Float16, x)
+float16(x) = convert(Float16, x)
 float32(x) = convert(Float32, x)
 float64(x) = convert(Float64, x)
 float(x)   = convert(FloatingPoint, x)
@@ -95,10 +184,10 @@ floor(x::Float64) = ccall((:floor, Base.libm_name), Float64, (Float64,), x)

 ## floating point promotions ##

-#promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
+promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
 promote_rule(::Type{Float64}, ::Type{Float32}) = Float64

-#morebits(::Type{Float16}) = Float32
+morebits(::Type{Float16}) = Float32
 morebits(::Type{Float32}) = Float64

 ## floating point arithmetic ##
diff --git a/base/io.jl b/base/io.jl
index 74fc9d0..56f3d68 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -49,7 +49,7 @@ else
 end

 write(s::IO, x::Bool)    = write(s, uint8(x))
-#write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
+write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
 write(s::IO, x::Float32) = write(s, reinterpret(Int32,x))
 write(s::IO, x::Float64) = write(s, reinterpret(Int64,x))

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index a2e3b2c..5a40814 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -44,7 +44,6 @@ namespace JL_I {
 using namespace JL_I;

 #include "ccall.cpp"
-#define DISABLE_FLOAT16

 /*
   low-level intrinsics design:
@@ -60,12 +59,9 @@ using namespace JL_I;

 static Type *FTnbits(size_t nb)
 {
-    #ifndef DISABLE_FLOAT16
     if(nb == 16)
         return Type::getHalfTy(jl_LLVMContext);
-    else
-    #endif
-    if(nb == 32)
+    else if(nb == 32)
         return Type::getFloatTy(jl_LLVMContext);
     else if(nb == 64)
         return Type::getDoubleTy(jl_LLVMContext);
@@ -141,12 +137,9 @@ static Value *emit_unboxed(jl_value_t *e, jl_codectx_t *ctx)
 #else
             #define LLVM_FP(a,b) APFloat(b,true)
 #endif
-#ifndef DISABLE_FLOAT16
             if (nb == 2)
                 return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEhalf,val)),(jl_value_t*)bt);
-            else
-#endif
-            if (nb == 4)
+            else if (nb == 4)
                 return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEsingle,val)),(jl_value_t*)bt);
             else if (nb == 8)
                 return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEdouble,val)),(jl_value_t*)bt);
	diff --git a/base/exports.jl b/base/exports.jl
	index ce47f3f..69fc4de 100644
	--- a/base/exports.jl
	+++ b/base/exports.jl
	@@ -330,7 +330,7 @@ export
	fld,
	flipsign,
	float,
	- #float16,
	+ float16,
	float32,
	float64,
	floor,
	diff --git a/base/float.jl b/base/float.jl
	index 3b782c4..ab550ab 100644
	--- a/base/float.jl
	+++ b/base/float.jl
	@@ -1,7 +1,10 @@
	-#bitstype 16 Float16 <: FloatingPoint
	+## non-core floating point types ##
	+
	+bitstype 16 Float16 <: FloatingPoint
	+
	## conversions to floating-point ##

	-for t1 in (Float32,Float64) #,Float16)
	+for t1 in (Float32,Float64,Float16)
	for st in (Int8,Int16,Int32,Int64,Int128)
	@eval begin
	convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
	@@ -15,12 +18,98 @@ for t1 in (Float32,Float64) #,Float16)
	end
	end
	end
	-#convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(x,Float16))
	-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
	convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))

	-# REPLACE when enabling Float16
	-#convert(::Type{Float64}, x::Union(Float32,Float16)) = box(Float64,fpext(Float64,x))
	+function convert(::Type{Float32}, val::Float16)
	+ val = uint32(reinterpret(Uint16, val))
	+ sign = (val & 0x8000) >> 15
	+ exp = (val & 0x7c00) >> 10
	+ sig = (val & 0x3ff) >> 0
	+ ret::Uint32
	+
	+ if exp == 0
	+ if sig == 0
	+ sign = sign << 31
	+ ret = sign \| exp \| sig
	+ else
	+ n_bit = 1
	+ bit = 0x0200
	+ while (bit & sig) == 0
	+ n_bit = n_bit + 1
	+ bit = bit >> 1
	+ end
	+ sign = sign << 31
	+ exp = (-14 - n_bit + 127) << 23
	+ sig = ((sig & (~bit)) << n_bit) << (23 - 10)
	+ ret = sign \| exp \| sig
	+ end
	+ elseif exp == 0x1f
	+ if sig == 0
	+ if sign == 0
	+ ret = 0x7f800000
	+ else
	+ ret = 0xff800000
	+ end
	+ else
	+ ret = 0xffffffff
	+ end
	+ else
	+ sign = sign << 31
	+ exp = (exp - 15 + 127) << 23
	+ sig = sig << (23 - 10)
	+ ret = sign \| exp \| sig
	+ end
	+ return reinterpret(Float32, ret)
	+end
	+
	+function convert(::Type{Float64}, val::Float16)
	+ val = uint64(reinterpret(Uint16, val))
	+ sign = (val & 0x8000) >> 15
	+ exp = (val & 0x7c00) >> 10
	+ sig = (val & 0x3ff) >> 0
	+ ret::Uint64
	+
	+ if exp == 0
	+ if sig == 0
	+ sign = sign << 63
	+ ret = sign \| exp \| sig
	+ else
	+ n_bit = 1
	+ bit = 0x0200
	+ while (bit & sig) == 0
	+ n_bit = n_bit + 1
	+ bit = bit >> 1
	+ end
	+ sign = sign << 63
	+ exp = (-14 - n_bit + 1023) << 52
	+ sig = ((sig & (~bit)) << n_bit) << (52 - 10)
	+ ret = sign \| exp \| sig
	+ end
	+ elseif exp == 0x1f
	+ if sig == 0
	+ if sign == 0
	+ ret = 0x7ff0000000000000
	+ else
	+ ret = 0xfff0000000000000
	+ end
	+ else
	+ ret = 0xffffffffffffffff
	+ end
	+ else
	+ sign = sign << 63
	+ exp = (exp - 15 + 1023) << 52
	+ sig = sig << (52 - 10)
	+ ret = sign \| exp \| sig
	+ end
	+
	+ return reinterpret(Float64, ret)
	+end
	+
	+convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(Float16,x))
	+
	+# TODO: replace above manual Float16 conversion once LLVM fixes 16-bit truncation
	+#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
	+#convert(::Type{Float64}, x::Float16) = box(Float64,fpext(Float64,x))
	convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,x))

	convert(::Type{FloatingPoint}, x::Bool) = convert(Float32, x)
	@@ -36,7 +125,7 @@ convert(::Type{FloatingPoint}, x::Uint32) = convert(Float64, x)
	convert(::Type{FloatingPoint}, x::Uint64) = convert(Float64, x) # LOSSY
	convert(::Type{FloatingPoint}, x::Uint128) = convert(Float64, x) # LOSSY

	-#float16(x) = convert(Float16, x)
	+float16(x) = convert(Float16, x)
	float32(x) = convert(Float32, x)
	float64(x) = convert(Float64, x)
	float(x) = convert(FloatingPoint, x)
	@@ -95,10 +184,10 @@ floor(x::Float64) = ccall((:floor, Base.libm_name), Float64, (Float64,), x)

	## floating point promotions ##

	-#promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
	+promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
	promote_rule(::Type{Float64}, ::Type{Float32}) = Float64

	-#morebits(::Type{Float16}) = Float32
	+morebits(::Type{Float16}) = Float32
	morebits(::Type{Float32}) = Float64

	## floating point arithmetic ##
	diff --git a/base/io.jl b/base/io.jl
	index 74fc9d0..56f3d68 100644
	--- a/base/io.jl
	+++ b/base/io.jl
	@@ -49,7 +49,7 @@ else
	end

	write(s::IO, x::Bool) = write(s, uint8(x))
	-#write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
	+write(s::IO, x::Float16) = write(s, reinterpret(Int16,x))
	write(s::IO, x::Float32) = write(s, reinterpret(Int32,x))
	write(s::IO, x::Float64) = write(s, reinterpret(Int64,x))

	diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
	index a2e3b2c..5a40814 100644
	--- a/src/intrinsics.cpp
	+++ b/src/intrinsics.cpp
	@@ -44,7 +44,6 @@ namespace JL_I {
	using namespace JL_I;

	#include "ccall.cpp"
	-#define DISABLE_FLOAT16

	/*
	low-level intrinsics design:
	@@ -60,12 +59,9 @@ using namespace JL_I;

	static Type *FTnbits(size_t nb)
	{
	- #ifndef DISABLE_FLOAT16
	if(nb == 16)
	return Type::getHalfTy(jl_LLVMContext);
	- else
	- #endif
	- if(nb == 32)
	+ else if(nb == 32)
	return Type::getFloatTy(jl_LLVMContext);
	else if(nb == 64)
	return Type::getDoubleTy(jl_LLVMContext);
	@@ -141,12 +137,9 @@ static Value emit_unboxed(jl_value_t e, jl_codectx_t *ctx)
	#else
	#define LLVM_FP(a,b) APFloat(b,true)
	#endif
	-#ifndef DISABLE_FLOAT16
	if (nb == 2)
	return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEhalf,val)),(jl_value_t*)bt);
	- else
	-#endif
	- if (nb == 4)
	+ else if (nb == 4)
	return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEsingle,val)),(jl_value_t*)bt);
	else if (nb == 8)
	return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEdouble,val)),(jl_value_t*)bt);