-
-
Save StefanKarpinski/9092d04bc24c44493d08 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/base/exports.jl b/base/exports.jl | |
index ce47f3f..69fc4de 100644 | |
--- a/base/exports.jl | |
+++ b/base/exports.jl | |
@@ -330,7 +330,7 @@ export | |
fld, | |
flipsign, | |
float, | |
- #float16, | |
+ float16, | |
float32, | |
float64, | |
floor, | |
diff --git a/base/float.jl b/base/float.jl | |
index 3b782c4..ab550ab 100644 | |
--- a/base/float.jl | |
+++ b/base/float.jl | |
@@ -1,7 +1,10 @@ | |
-#bitstype 16 Float16 <: FloatingPoint | |
+## non-core floating point types ## | |
+ | |
+bitstype 16 Float16 <: FloatingPoint | |
+ | |
## conversions to floating-point ## | |
-for t1 in (Float32,Float64) #,Float16) | |
+for t1 in (Float32,Float64,Float16) | |
for st in (Int8,Int16,Int32,Int64,Int128) | |
@eval begin | |
convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x))) | |
@@ -15,12 +18,98 @@ for t1 in (Float32,Float64) #,Float16) | |
end | |
end | |
end | |
-#convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(x,Float16)) | |
-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x)) | |
convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x)) | |
-# REPLACE when enabling Float16 | |
-#convert(::Type{Float64}, x::Union(Float32,Float16)) = box(Float64,fpext(Float64,x)) | |
+function convert(::Type{Float32}, val::Float16) | |
+ val = uint32(reinterpret(Uint16, val)) | |
+ sign = (val & 0x8000) >> 15 | |
+ exp = (val & 0x7c00) >> 10 | |
+ sig = (val & 0x3ff) >> 0 | |
+ ret::Uint32 | |
+ | |
+ if exp == 0 | |
+ if sig == 0 | |
+ sign = sign << 31 | |
+ ret = sign | exp | sig | |
+ else | |
+ n_bit = 1 | |
+ bit = 0x0200 | |
+ while (bit & sig) == 0 | |
+ n_bit = n_bit + 1 | |
+ bit = bit >> 1 | |
+ end | |
+ sign = sign << 31 | |
+ exp = (-14 - n_bit + 127) << 23 | |
+ sig = ((sig & (~bit)) << n_bit) << (23 - 10) | |
+ ret = sign | exp | sig | |
+ end | |
+ elseif exp == 0x1f | |
+ if sig == 0 | |
+ if sign == 0 | |
+ ret = 0x7f800000 | |
+ else | |
+ ret = 0xff800000 | |
+ end | |
+ else | |
+ ret = 0xffffffff | |
+ end | |
+ else | |
+ sign = sign << 31 | |
+ exp = (exp - 15 + 127) << 23 | |
+ sig = sig << (23 - 10) | |
+ ret = sign | exp | sig | |
+ end | |
+ return reinterpret(Float32, ret) | |
+end | |
+ | |
+function convert(::Type{Float64}, val::Float16) | |
+ val = uint64(reinterpret(Uint16, val)) | |
+ sign = (val & 0x8000) >> 15 | |
+ exp = (val & 0x7c00) >> 10 | |
+ sig = (val & 0x3ff) >> 0 | |
+ ret::Uint64 | |
+ | |
+ if exp == 0 | |
+ if sig == 0 | |
+ sign = sign << 63 | |
+ ret = sign | exp | sig | |
+ else | |
+ n_bit = 1 | |
+ bit = 0x0200 | |
+ while (bit & sig) == 0 | |
+ n_bit = n_bit + 1 | |
+ bit = bit >> 1 | |
+ end | |
+ sign = sign << 63 | |
+ exp = (-14 - n_bit + 1023) << 52 | |
+ sig = ((sig & (~bit)) << n_bit) << (52 - 10) | |
+ ret = sign | exp | sig | |
+ end | |
+ elseif exp == 0x1f | |
+ if sig == 0 | |
+ if sign == 0 | |
+ ret = 0x7ff0000000000000 | |
+ else | |
+ ret = 0xfff0000000000000 | |
+ end | |
+ else | |
+ ret = 0xffffffffffffffff | |
+ end | |
+ else | |
+ sign = sign << 63 | |
+ exp = (exp - 15 + 1023) << 52 | |
+ sig = sig << (52 - 10) | |
+ ret = sign | exp | sig | |
+ end | |
+ | |
+ return reinterpret(Float64, ret) | |
+end | |
+ | |
+convert(::Type{Float16}, x::Union(Float32,Float64)) = box(Float16,fptrunc(Float16,x)) | |
+ | |
+# TODO: replace above manual Float16 conversion once LLVM fixes 16-bit truncation | |
+#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x)) | |
+#convert(::Type{Float64}, x::Float16) = box(Float64,fpext(Float64,x)) | |
convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,x)) | |
convert(::Type{FloatingPoint}, x::Bool) = convert(Float32, x) | |
@@ -36,7 +125,7 @@ convert(::Type{FloatingPoint}, x::Uint32) = convert(Float64, x) | |
convert(::Type{FloatingPoint}, x::Uint64) = convert(Float64, x) # LOSSY | |
convert(::Type{FloatingPoint}, x::Uint128) = convert(Float64, x) # LOSSY | |
-#float16(x) = convert(Float16, x) | |
+float16(x) = convert(Float16, x) | |
float32(x) = convert(Float32, x) | |
float64(x) = convert(Float64, x) | |
float(x) = convert(FloatingPoint, x) | |
@@ -95,10 +184,10 @@ floor(x::Float64) = ccall((:floor, Base.libm_name), Float64, (Float64,), x) | |
## floating point promotions ## | |
-#promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 | |
+promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 | |
promote_rule(::Type{Float64}, ::Type{Float32}) = Float64 | |
-#morebits(::Type{Float16}) = Float32 | |
+morebits(::Type{Float16}) = Float32 | |
morebits(::Type{Float32}) = Float64 | |
## floating point arithmetic ## | |
diff --git a/base/io.jl b/base/io.jl | |
index 74fc9d0..56f3d68 100644 | |
--- a/base/io.jl | |
+++ b/base/io.jl | |
@@ -49,7 +49,7 @@ else | |
end | |
write(s::IO, x::Bool) = write(s, uint8(x)) | |
-#write(s::IO, x::Float16) = write(s, reinterpret(Int16,x)) | |
+write(s::IO, x::Float16) = write(s, reinterpret(Int16,x)) | |
write(s::IO, x::Float32) = write(s, reinterpret(Int32,x)) | |
write(s::IO, x::Float64) = write(s, reinterpret(Int64,x)) | |
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp | |
index a2e3b2c..5a40814 100644 | |
--- a/src/intrinsics.cpp | |
+++ b/src/intrinsics.cpp | |
@@ -44,7 +44,6 @@ namespace JL_I { | |
using namespace JL_I; | |
#include "ccall.cpp" | |
-#define DISABLE_FLOAT16 | |
/* | |
low-level intrinsics design: | |
@@ -60,12 +59,9 @@ using namespace JL_I; | |
static Type *FTnbits(size_t nb) | |
{ | |
- #ifndef DISABLE_FLOAT16 | |
if(nb == 16) | |
return Type::getHalfTy(jl_LLVMContext); | |
- else | |
- #endif | |
- if(nb == 32) | |
+ else if(nb == 32) | |
return Type::getFloatTy(jl_LLVMContext); | |
else if(nb == 64) | |
return Type::getDoubleTy(jl_LLVMContext); | |
@@ -141,12 +137,9 @@ static Value *emit_unboxed(jl_value_t *e, jl_codectx_t *ctx) | |
#else | |
#define LLVM_FP(a,b) APFloat(b,true) | |
#endif | |
-#ifndef DISABLE_FLOAT16 | |
if (nb == 2) | |
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEhalf,val)),(jl_value_t*)bt); | |
- else | |
-#endif | |
- if (nb == 4) | |
+ else if (nb == 4) | |
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEsingle,val)),(jl_value_t*)bt); | |
else if (nb == 8) | |
return mark_julia_type(ConstantFP::get(jl_LLVMContext,LLVM_FP(APFloat::IEEEdouble,val)),(jl_value_t*)bt); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment