Last active
January 2, 2018 21:57
-
-
Save jasigal/24abd01e68a453e0f4bd91ec21a3421c to your computer and use it in GitHub Desktop.
Accelerate LLVM FP contract bug
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: accelerate-llvm-bug | |
version: 0.1.0.0 | |
build-type: Simple | |
cabal-version: >= 1.10 | |
executable accelerate-llvm-bug | |
hs-source-dirs: . | |
main-is: Main.hs | |
default-language: Haskell2010 | |
build-depends: base >= 4.7 && < 5 | |
, accelerate >= 1.1 && < 1.2 | |
, accelerate-llvm >= 1.1 && < 1.2 | |
, accelerate-llvm-native >= 1.1 && < 1.2 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Taken from running: | |
; stack exec -- accelerate-llvm-bug +ACC -fforce-recomp -ddump-cc -ddebug-cc -ddump-ld -ddump-asm -ddump-exec -ddump-sched -ddump-phases -ddump-gc -dverbose -ACC | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; ModuleID = 'map_1bf1a791bb25369d' | |
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | |
target triple = "x86_64-pc-linux-gnu" | |
; Function Attrs: nounwind | |
define void @map_1bf1a791bb25369d(i64 %ix.start, i64 %ix.end, double* noalias nocapture %out.ad0, double* noalias nocapture readonly %fv0.ad0) local_unnamed_addr #0 { | |
entry: | |
%0 = icmp slt i64 %ix.start, %ix.end | |
br i1 %0, label %while1.top.preheader, label %while1.exit | |
while1.top.preheader: ; preds = %entry | |
br label %while1.top | |
while1.top: ; preds = %while1.top.preheader, %while1.top | |
%1 = phi i64 [ %10, %while1.top ], [ %ix.start, %while1.top.preheader ] | |
%2 = getelementptr double, double* %fv0.ad0, i64 %1 | |
%3 = load double, double* %2, align 8 | |
%4 = fdiv fast double 1.000000e+00, %3 | |
%5 = fmul fast double %4, %4 | |
%6 = fsub fast double -0.000000e+00, %5 | |
%7 = tail call double @tanh(double %6) #1 | |
%8 = fadd fast double %4, %7 | |
%9 = getelementptr double, double* %out.ad0, i64 %1 | |
store double %8, double* %9, align 8 | |
%10 = add nsw i64 %1, 1 | |
%exitcond = icmp eq i64 %10, %ix.end | |
br i1 %exitcond, label %while1.exit, label %while1.top | |
while1.exit: ; preds = %while1.top, %entry | |
ret void | |
} | |
; Function Attrs: nounwind readonly | |
declare double @tanh(double) local_unnamed_addr #1 | |
attributes #0 = { nounwind } | |
attributes #1 = { nounwind readonly } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Taken from running same command as above | |
.text | |
.file "" | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3 | |
.LCPI0_0: | |
.quad 4607182418800017408 | |
.LCPI0_1: | |
.quad 0 | |
.text | |
.globl map_1bf1a791bb25369d | |
.p2align 4, 0x90 | |
.type map_1bf1a791bb25369d,@function | |
map_1bf1a791bb25369d: | |
pushq %r15 | |
pushq %r14 | |
pushq %rbx | |
subq $16, %rsp | |
movq %rsi, %r14 | |
cmpq %r14, %rdi | |
jge .LBB0_3 | |
subq %rdi, %r14 | |
leaq (%rdx,%rdi,8), %r15 | |
leaq (%rcx,%rdi,8), %rbx | |
.p2align 4, 0x90 | |
.LBB0_2: | |
vmovsd .LCPI0_0(%rip), %xmm0 | |
vdivsd (%rbx), %xmm0, %xmm0 | |
vmovsd %xmm0, 8(%rsp) | |
vfnmsub213sd .LCPI0_1, %xmm0, %xmm0 | |
callq tanh | |
vaddsd 8(%rsp), %xmm0, %xmm0 | |
vmovsd %xmm0, (%r15) | |
addq $8, %r15 | |
addq $8, %rbx | |
addq $-1, %r14 | |
jne .LBB0_2 | |
.LBB0_3: | |
addq $16, %rsp | |
popq %rbx | |
popq %r14 | |
popq %r15 | |
retq | |
.Lfunc_end0: | |
.size map_1bf1a791bb25369d, .Lfunc_end0-map_1bf1a791bb25369d | |
.section ".note.GNU-stack","",@progbits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Created by running "llc-5.0 autogen.ll -mattr=+fma -fp-contract=on -o autogen_from_ll_fp_contract.s" | |
.text | |
.file "autogen.ll" | |
.section .rodata.cst8,"aM",@progbits,8 | |
.p2align 3 # -- Begin function map_1bf1a791bb25369d | |
.LCPI0_0: | |
.quad 4607182418800017408 # double 1 | |
.LCPI0_1: | |
.quad 0 # double 0 | |
.text | |
.globl map_1bf1a791bb25369d | |
.p2align 4, 0x90 | |
.type map_1bf1a791bb25369d,@function | |
map_1bf1a791bb25369d: # @map_1bf1a791bb25369d | |
# BB#0: # %entry | |
pushq %r15 | |
pushq %r14 | |
pushq %rbx | |
subq $16, %rsp | |
movq %rsi, %r14 | |
cmpq %r14, %rdi | |
jge .LBB0_3 | |
# BB#1: # %while1.top.preheader | |
subq %rdi, %r14 | |
leaq (%rdx,%rdi,8), %r15 | |
leaq (%rcx,%rdi,8), %rbx | |
.p2align 4, 0x90 | |
.LBB0_2: # %while1.top | |
# =>This Inner Loop Header: Depth=1 | |
vmovsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero | |
vdivsd (%rbx), %xmm0, %xmm0 | |
vmovsd %xmm0, 8(%rsp) # 8-byte Spill | |
vfnmsub213sd .LCPI0_1, %xmm0, %xmm0 | |
callq tanh | |
vaddsd 8(%rsp), %xmm0, %xmm0 # 8-byte Folded Reload | |
vmovsd %xmm0, (%r15) | |
addq $8, %r15 | |
addq $8, %rbx | |
decq %r14 | |
jne .LBB0_2 | |
.LBB0_3: # %while1.exit | |
addq $16, %rsp | |
popq %rbx | |
popq %r14 | |
popq %r15 | |
retq | |
.Lfunc_end0: | |
.size map_1bf1a791bb25369d, .Lfunc_end0-map_1bf1a791bb25369d | |
# -- End function | |
.section ".note.GNU-stack","",@progbits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Data.Array.Accelerate as A | |
import Data.Array.Accelerate.LLVM.Native | |
g :: Scalar Double -> Scalar Double | |
g = (runN (A.map f :: Acc (Scalar Double) -> Acc (Scalar Double))) | |
where | |
f x = let y = recip x | |
b = (-y) * y | |
in y + tanh b | |
main :: IO () | |
main = do | |
let r = g (fromList Z [1]) | |
print (indexArray r Z) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
resolver: lts-10.2 | |
packages: | |
- . | |
flags: | |
accelerate: | |
debug: true | |
unsafe-checks: true | |
internal-checks: true | |
llvm-hs: | |
shared-llvm: true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The above is a bug in
accelerate-llvm-native
. To exhibit the bug, downloadMain.hs
,accelerate-llvm-bug.cabal
, andstack.yaml
into one directory can runstack build
, followed by:The output should contain
autogen.ll
andautogen.s
, and should crash with“stack exec -- accelerate-llvm-b…” terminated by signal SIGSEGV (Address boundary error)
or similar. By running withlldb-5.0
on the generate executable:we see that the offending instruction is ln. 30 in
autogen.s
(and ln. 33 inautogen_from_ll_fp_contract.s
.) Note thatautogen.s
andautogen_from_ll_fp_contract.s
are virtually identical due to the-fp-contract
option being passed tollc
. Without this option, very different assembly is generated.The
vfnmsub213sd
instruction is a "fused negative multiply-subtract of scalar double-precision floating-point values" and appears to originate from lines 22 and 23 ofautogen.ll
. The issue appears to be that the instruction is used in absolute addressing mode:vs.
By making this manual change to
autogen.s
, compiling withclang-5.0 autogen.s -c -o autogen.o
, and inserting the object file into Accelerates cache (~/.accelerate/accelerate-llvm-1.1.0.0/accelerate-llvm-native-1.1.0.1/x86_64-pc-linux-gnu/broadwell/rel
, but obviously different on different architectures) and running the executable:produces the correct result of
0.23840584404423515 = 1 + tanh(-1)
.