Skip to content

Instantly share code, notes, and snippets.

@simonster
Created May 23, 2014 02:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonster/0d397d36f1183dcd5965 to your computer and use it in GitHub Desktop.
Save simonster/0d397d36f1183dcd5965 to your computer and use it in GitHub Desktop.
sum_seq code comparison

With master:

julia> code_native(Base.sum_seq, (Vector{Float64}, Int, Int))
	.text
Filename: reduce.jl
Source line: 226
	push	RBP
	mov	RBP, RSP
Source line: 226
	mov	RAX, QWORD PTR [RDI + 8]
	vmovsd	XMM0, QWORD PTR [RAX + 8*RSI - 8]
Source line: 210
	lea	RCX, QWORD PTR [RSI + 6]
	cmp	RCX, RDX
	jge	181
Source line: 229
	vmovsd	XMM2, QWORD PTR [RAX + 8*RSI + 16]
Source line: 226
	vaddsd	XMM1, XMM0, QWORD PTR [RAX + 8*RSI + 24]
Source line: 229
	vaddsd	XMM0, XMM2, QWORD PTR [RAX + 8*RSI + 48]
Source line: 227
	vmovsd	XMM3, QWORD PTR [RAX + 8*RSI]
Source line: 228
	vmovsd	XMM2, QWORD PTR [RAX + 8*RSI + 8]
	vaddsd	XMM2, XMM2, QWORD PTR [RAX + 8*RSI + 40]
Source line: 227
	vaddsd	XMM3, XMM3, QWORD PTR [RAX + 8*RSI + 32]
Source line: 232
	lea	R8, QWORD PTR [RDX - 3]
Source line: 231
	lea	RAX, QWORD PTR [RSI + 8]
	cmp	RAX, R8
	jg	71
Source line: 234
	mov	R11, QWORD PTR [RDI + 8]
Source line: 238
	lea	R9, QWORD PTR [R11 + 8*RSI + 56]
	lea	R10, QWORD PTR [R11 + 8*RSI + 64]
	lea	RCX, QWORD PTR [R11 + 8*RSI + 72]
	lea	RSI, QWORD PTR [R11 + 8*RSI + 80]
Source line: 237
	vaddsd	XMM0, XMM0, QWORD PTR [RSI]
Source line: 236
	vaddsd	XMM2, XMM2, QWORD PTR [RCX]
Source line: 235
	vaddsd	XMM3, XMM3, QWORD PTR [R10]
Source line: 238
	add	R10, 32
Source line: 234
	vaddsd	XMM1, XMM1, QWORD PTR [R9]
Source line: 238
	add	R9, 32
	add	RCX, 32
	add	RSI, 32
	add	RAX, 4
	cmp	RAX, R8
	jle	-47
Source line: 241
	cmp	RAX, RDX
	jg	29
Source line: 242
	mov	RCX, QWORD PTR [RDI + 8]
	lea	RCX, QWORD PTR [RCX + 8*RAX - 8]
	vaddsd	XMM1, XMM1, QWORD PTR [RCX]
Source line: 243
	add	RCX, 8
	inc	RAX
	cmp	RAX, RDX
	jle	-20
Source line: 246
	vaddsd	XMM1, XMM3, XMM1
	vaddsd	XMM1, XMM2, XMM1
	vaddsd	XMM0, XMM0, XMM1
	pop	RBP
	ret
Source line: 212
	vaddsd	XMM0, XMM0, QWORD PTR [RAX + 8*RSI]
Source line: 213
	lea	RAX, QWORD PTR [RSI + 1]
	cmp	RAX, RDX
	jge	32
Source line: 215
	dec	RDX
	sub	RDX, RSI
	mov	RAX, QWORD PTR [RDI + 8]
	lea	RAX, QWORD PTR [RAX + 8*RSI + 8]
	vaddsd	XMM0, XMM0, QWORD PTR [RAX]
	add	RAX, 8
	dec	RDX
	jne	-17
Source line: 217
	pop	RBP
	ret

julia> code_llvm(Base.sum_seq, (Vector{Float64}, Int, Int))

define double @julia_sum_seq17922(%jl_value_t*, i64, i64) {
top:
  %3 = add i64 %1, 6, !dbg !590
  %4 = icmp slt i64 %3, %2, !dbg !590
  %5 = add i64 %1, -1, !dbg !591
  %6 = getelementptr inbounds %jl_value_t* %0, i64 1, i32 0, !dbg !591
  %7 = load %jl_value_t** %6, align 8, !dbg !591, !tbaa %jtbaa_arrayptr
  %8 = getelementptr %jl_value_t* %7, i64 %5, !dbg !591
  %9 = bitcast %jl_value_t* %8 to double*, !dbg !591
  %10 = load double* %9, align 8, !dbg !591, !tbaa %jtbaa_user
  br i1 %4, label %L5, label %if, !dbg !590

if:                                               ; preds = %top
  %11 = getelementptr %jl_value_t* %7, i64 %1, !dbg !597
  %12 = bitcast %jl_value_t* %11 to double*, !dbg !597
  %13 = load double* %12, align 8, !dbg !597, !tbaa %jtbaa_user
  %14 = fadd double %10, %13, !dbg !597
  %15 = add i64 %1, 1, !dbg !598
  %16 = icmp slt i64 %15, %2, !dbg !599
  br i1 %16, label %L.preheader, label %L4, !dbg !599

L.preheader:                                      ; preds = %if
  %sunkaddr = ptrtoint %jl_value_t* %0 to i64, !dbg !600
  %sunkaddr53 = add i64 %sunkaddr, 8, !dbg !600
  %sunkaddr54 = inttoptr i64 %sunkaddr53 to %jl_value_t**, !dbg !600
  %17 = load %jl_value_t** %sunkaddr54, align 8, !dbg !600, !tbaa %jtbaa_arrayptr
  %18 = add i64 %1, 1, !dbg !600
  %scevgep48 = getelementptr %jl_value_t* %17, i64 %18
  %19 = add i64 %2, -1, !dbg !600
  %20 = sub i64 %19, %1, !dbg !600
  br label %L, !dbg !600

L:                                                ; preds = %L, %L.preheader
  %lsr.iv52 = phi i64 [ %lsr.iv.next, %L ], [ %20, %L.preheader ], !dbg !600
  %lsr.iv49 = phi %jl_value_t* [ %scevgep50, %L ], [ %scevgep48, %L.preheader ]
  %s.0 = phi double [ %22, %L ], [ %14, %L.preheader ]
  %lsr.iv4951 = bitcast %jl_value_t* %lsr.iv49 to double*
  %21 = load double* %lsr.iv4951, align 8, !dbg !600, !tbaa %jtbaa_user
  %22 = fadd double %s.0, %21, !dbg !600
  %scevgep50 = getelementptr %jl_value_t* %lsr.iv49, i64 1, !dbg !600
  %lsr.iv.next = add i64 %lsr.iv52, -1, !dbg !600
  %exitcond = icmp eq i64 %lsr.iv.next, 0, !dbg !600
  br i1 %exitcond, label %L4, label %L, !dbg !600

L4:                                               ; preds = %L, %if
  %s.1 = phi double [ %14, %if ], [ %22, %L ]
  ret double %s.1, !dbg !601

L5:                                               ; preds = %top
  %23 = add i64 %1, 3, !dbg !591
  %24 = getelementptr %jl_value_t* %7, i64 %23, !dbg !591
  %25 = bitcast %jl_value_t* %24 to double*, !dbg !591
  %26 = load double* %25, align 8, !dbg !591, !tbaa %jtbaa_user
  %27 = fadd double %10, %26, !dbg !591
  %28 = getelementptr %jl_value_t* %7, i64 %1, !dbg !602
  %29 = bitcast %jl_value_t* %28 to double*, !dbg !602
  %30 = load double* %29, align 8, !dbg !602, !tbaa %jtbaa_user
  %31 = add i64 %1, 4, !dbg !602
  %32 = getelementptr %jl_value_t* %7, i64 %31, !dbg !602
  %33 = bitcast %jl_value_t* %32 to double*, !dbg !602
  %34 = load double* %33, align 8, !dbg !602, !tbaa %jtbaa_user
  %35 = fadd double %30, %34, !dbg !602
  %36 = add i64 %1, 1, !dbg !603
  %37 = getelementptr %jl_value_t* %7, i64 %36, !dbg !603
  %38 = bitcast %jl_value_t* %37 to double*, !dbg !603
  %39 = load double* %38, align 8, !dbg !603, !tbaa %jtbaa_user
  %40 = add i64 %1, 5, !dbg !603
  %41 = getelementptr %jl_value_t* %7, i64 %40, !dbg !603
  %42 = bitcast %jl_value_t* %41 to double*, !dbg !603
  %43 = load double* %42, align 8, !dbg !603, !tbaa %jtbaa_user
  %44 = fadd double %39, %43, !dbg !603
  %45 = add i64 %1, 2, !dbg !604
  %46 = getelementptr %jl_value_t* %7, i64 %45, !dbg !604
  %47 = bitcast %jl_value_t* %46 to double*, !dbg !604
  %48 = load double* %47, align 8, !dbg !604, !tbaa %jtbaa_user
  %sunkaddr55 = ptrtoint %jl_value_t* %7 to i64, !dbg !604
  %sunkaddr56 = mul i64 %1, 8, !dbg !604
  %sunkaddr57 = add i64 %sunkaddr55, %sunkaddr56, !dbg !604
  %sunkaddr58 = add i64 %sunkaddr57, 48, !dbg !604
  %sunkaddr59 = inttoptr i64 %sunkaddr58 to double*, !dbg !604
  %49 = load double* %sunkaddr59, align 8, !dbg !604, !tbaa %jtbaa_user
  %50 = fadd double %48, %49, !dbg !604
  %51 = add i64 %1, 8, !dbg !605
  %52 = add i64 %2, -3, !dbg !606
  %53 = icmp sgt i64 %51, %52, !dbg !607
  br i1 %53, label %L10, label %L7.preheader, !dbg !607

L7.preheader:                                     ; preds = %L5
  %sunkaddr60 = ptrtoint %jl_value_t* %0 to i64, !dbg !608
  %sunkaddr61 = add i64 %sunkaddr60, 8, !dbg !608
  %sunkaddr62 = inttoptr i64 %sunkaddr61 to %jl_value_t**, !dbg !608
  %54 = load %jl_value_t** %sunkaddr62, align 8, !dbg !608, !tbaa %jtbaa_arrayptr
  %55 = add i64 %1, 10, !dbg !608
  %scevgep32 = getelementptr %jl_value_t* %54, i64 %55
  %56 = add i64 %1, 9, !dbg !608
  %scevgep36 = getelementptr %jl_value_t* %54, i64 %56, !dbg !609
  %57 = add i64 %1, 8, !dbg !608
  %scevgep40 = getelementptr %jl_value_t* %54, i64 %57, !dbg !609
  %58 = add i64 %1, 7, !dbg !608
  %scevgep44 = getelementptr %jl_value_t* %54, i64 %58, !dbg !609
  br label %L7, !dbg !608

L7:                                               ; preds = %L7.preheader, %L7
  %lsr.iv45 = phi %jl_value_t* [ %scevgep44, %L7.preheader ], [ %scevgep46, %L7 ], !dbg !609
  %lsr.iv41 = phi %jl_value_t* [ %scevgep40, %L7.preheader ], [ %scevgep42, %L7 ], !dbg !609
  %lsr.iv37 = phi %jl_value_t* [ %scevgep36, %L7.preheader ], [ %scevgep38, %L7 ], !dbg !609
  %lsr.iv33 = phi %jl_value_t* [ %scevgep32, %L7.preheader ], [ %scevgep34, %L7 ]
  %s3.0 = phi double [ %64, %L7 ], [ %44, %L7.preheader ]
  %s2.0 = phi double [ %62, %L7 ], [ %35, %L7.preheader ]
  %s1.0 = phi double [ %60, %L7 ], [ %27, %L7.preheader ]
  %s4.0 = phi double [ %66, %L7 ], [ %50, %L7.preheader ]
  %i.1 = phi i64 [ %67, %L7 ], [ %51, %L7.preheader ]
  %lsr.iv4547 = bitcast %jl_value_t* %lsr.iv45 to double*
  %lsr.iv4143 = bitcast %jl_value_t* %lsr.iv41 to double*
  %lsr.iv3739 = bitcast %jl_value_t* %lsr.iv37 to double*
  %lsr.iv3335 = bitcast %jl_value_t* %lsr.iv33 to double*
  %59 = load double* %lsr.iv4547, align 8, !dbg !608, !tbaa %jtbaa_user
  %60 = fadd double %s1.0, %59, !dbg !608
  %61 = load double* %lsr.iv4143, align 8, !dbg !610, !tbaa %jtbaa_user
  %62 = fadd double %s2.0, %61, !dbg !610
  %63 = load double* %lsr.iv3739, align 8, !dbg !611, !tbaa %jtbaa_user
  %64 = fadd double %s3.0, %63, !dbg !611
  %65 = load double* %lsr.iv3335, align 8, !dbg !612, !tbaa %jtbaa_user
  %66 = fadd double %s4.0, %65, !dbg !612
  %67 = add i64 %i.1, 4, !dbg !609
  %scevgep34 = getelementptr %jl_value_t* %lsr.iv33, i64 4, !dbg !609
  %scevgep38 = getelementptr %jl_value_t* %lsr.iv37, i64 4, !dbg !609
  %scevgep42 = getelementptr %jl_value_t* %lsr.iv41, i64 4, !dbg !609
  %scevgep46 = getelementptr %jl_value_t* %lsr.iv45, i64 4, !dbg !609
  %68 = icmp sgt i64 %67, %52, !dbg !609
  br i1 %68, label %L10, label %L7, !dbg !609

L10:                                              ; preds = %L7, %L5
  %s3.1 = phi double [ %44, %L5 ], [ %64, %L7 ]
  %s2.1 = phi double [ %35, %L5 ], [ %62, %L7 ]
  %s1.1 = phi double [ %27, %L5 ], [ %60, %L7 ]
  %s4.1 = phi double [ %50, %L5 ], [ %66, %L7 ]
  %i.2 = phi i64 [ %51, %L5 ], [ %67, %L7 ]
  %69 = icmp sgt i64 %i.2, %2, !dbg !613
  br i1 %69, label %L15, label %L12.preheader, !dbg !613

L12.preheader:                                    ; preds = %L10
  %sunkaddr63 = ptrtoint %jl_value_t* %0 to i64, !dbg !614
  %sunkaddr64 = add i64 %sunkaddr63, 8, !dbg !614
  %sunkaddr65 = inttoptr i64 %sunkaddr64 to %jl_value_t**, !dbg !614
  %70 = load %jl_value_t** %sunkaddr65, align 8, !dbg !614, !tbaa %jtbaa_arrayptr
  %71 = add i64 %i.2, -1, !dbg !614
  %scevgep = getelementptr %jl_value_t* %70, i64 %71
  br label %L12, !dbg !614

L12:                                              ; preds = %L12.preheader, %L12
  %lsr.iv = phi %jl_value_t* [ %scevgep, %L12.preheader ], [ %scevgep30, %L12 ]
  %s1.2 = phi double [ %73, %L12 ], [ %s1.1, %L12.preheader ]
  %i.3 = phi i64 [ %74, %L12 ], [ %i.2, %L12.preheader ]
  %lsr.iv31 = bitcast %jl_value_t* %lsr.iv to double*
  %72 = load double* %lsr.iv31, align 8, !dbg !614, !tbaa %jtbaa_user
  %73 = fadd double %s1.2, %72, !dbg !614
  %74 = add i64 %i.3, 1, !dbg !615
  %scevgep30 = getelementptr %jl_value_t* %lsr.iv, i64 1, !dbg !615
  %75 = icmp sgt i64 %74, %2, !dbg !615
  br i1 %75, label %L15, label %L12, !dbg !615

L15:                                              ; preds = %L12, %L10
  %s1.3 = phi double [ %s1.1, %L10 ], [ %73, %L12 ]
  %76 = fadd double %s2.1, %s1.3, !dbg !616
  %77 = fadd double %s3.1, %76, !dbg !616
  %78 = fadd double %s4.1, %77, !dbg !616
  ret double %78, !dbg !616
}

With #6928

julia> code_native(Base.sum_seq, (Vector{Float64}, Int, Int))
	.text
Filename: reduce.jl
Source line: 33
	push	RBP
	mov	RBP, RSP
Source line: 33
	push	R14
	push	RBX
	and	RSP, -32
	sub	RSP, 160
	lea	R8, QWORD PTR [RSI + 1]
	lea	RAX, QWORD PTR [RSI + 2]
	cmp	RAX, RDX
	cmovg	RDX, R8
	sub	RDX, RAX
	jo	683
	lea	R10, QWORD PTR [RDX + 1]
	add	RDX, 1
	jo	669
Source line: 210
	mov	RAX, QWORD PTR [RDI + 8]
	vmovsd	XMM0, QWORD PTR [RAX + 8*RSI - 8]
	vaddsd	XMM0, XMM0, QWORD PTR [RAX + 8*RSI]
Source line: 33
	test	R10, R10
	jle	633
	vxorpd	YMM15, YMM15, YMM15
	vmovsd	XMM0, XMM15, XMM0
	vinsertf128	YMM13, YMM15, XMM0, 0
Source line: 212
	mov	RCX, QWORD PTR [RDI + 8]
	mov	R9, R10
	and	R9, -16
	je	528
	vmovq	XMM0, R8
	vmovlhps	XMM0, XMM0, XMM0 # xmm0 = xmm0[0,0]
	vinsertf128	YMM11, YMM0, XMM0, 1
	vxorpd	YMM15, YMM15, YMM15
	xor	EDI, EDI
	movabs	RAX, 139889361538016
	vmovaps	XMM0, XMMWORD PTR [RAX]
	vmovaps	XMMWORD PTR [RSP + 112], XMM0
	movabs	RAX, 139889361538032
	vmovaps	XMM0, XMMWORD PTR [RAX]
	vmovaps	XMMWORD PTR [RSP + 96], XMM0
	movabs	RAX, 139889361538048
	vmovaps	XMM0, XMMWORD PTR [RAX]
	vmovaps	XMMWORD PTR [RSP + 80], XMM0
	movabs	RAX, 139889361538064
	vmovaps	XMM0, XMMWORD PTR [RAX]
	vmovaps	XMMWORD PTR [RSP + 64], XMM0
	movabs	RAX, 139889361538080
	movabs	RBX, 139889361538096
	movabs	RDX, 139889361538112
	vmovaps	XMM0, XMMWORD PTR [RBX]
	vmovaps	XMMWORD PTR [RSP + 48], XMM0
	vmovaps	XMM0, XMMWORD PTR [RAX]
	vmovaps	XMMWORD PTR [RSP + 32], XMM0
	vmovapd	XMM0, XMMWORD PTR [RDX]
	vmovapd	XMMWORD PTR [RSP + 16], XMM0
	mov	R8D, 1
	vxorpd	YMM14, YMM14, YMM14
	vxorpd	YMM6, YMM6, YMM6
	vmovq	XMM0, RDI
	vmovlhps	XMM5, XMM0, XMM0 # xmm5 = xmm0[0,0]
	vpaddq	XMM0, XMM5, XMMWORD PTR [RSP + 112]
	vpaddq	XMM1, XMM5, XMMWORD PTR [RSP + 96]
	vextractf128	XMM7, YMM11, 1
	vpaddq	XMM2, XMM7, XMM1
	vpaddq	XMM10, XMM7, XMM0
	vpaddq	XMM0, XMM5, XMMWORD PTR [RSP + 80]
	vpaddq	XMM0, XMM11, XMM0
	vpextrq	R14, XMM0, 1
	vmovq	R11, XMM0
	vmovq	RAX, XMM10
	vmovq	RBX, XMM2
	vpaddq	XMM0, XMM5, XMMWORD PTR [RSP + 64]
	vpaddq	XMM12, XMM7, XMM0
	vmovq	RDX, XMM12
	vmovsd	XMM9, QWORD PTR [RCX + 8*RDX]
	vmovsd	XMM8, QWORD PTR [RCX + 8*RBX]
	vmovsd	XMM3, QWORD PTR [RCX + 8*RAX]
	vmovsd	XMM0, QWORD PTR [RCX + 8*R11]
	vpextrq	R11, XMM2, 1
	add	RDI, 16
	vmovhpd	XMM0, XMM0, QWORD PTR [RCX + 8*R14]
	vmovapd	YMMWORD PTR [RSP + 128], YMM0
	vpextrq	R14, XMM10, 1
	vpextrq	RDX, XMM12, 1
	vpaddq	XMM0, XMM5, XMMWORD PTR [RSP + 48]
	vpaddq	XMM1, XMM5, XMMWORD PTR [RSP + 32]
	vpaddq	XMM1, XMM11, XMM1
	vpextrq	RBX, XMM1, 1
	vmovq	RAX, XMM1
	vmovsd	XMM1, QWORD PTR [RCX + 8*RAX]
	vmovhpd	XMM10, XMM1, QWORD PTR [RCX + 8*RBX]
	vpaddq	XMM4, XMM11, XMM0
	vmovhpd	XMM2, XMM9, QWORD PTR [RCX + 8*RDX]
	vmovhpd	XMM0, XMM8, QWORD PTR [RCX + 8*R11]
	vmovhpd	XMM1, XMM3, QWORD PTR [RCX + 8*R14]
	vpaddq	XMM3, XMM5, XMMWORD PTR [RSP + 16]
	vpaddq	XMM3, XMM7, XMM3
	vpextrq	RAX, XMM4, 1
	vmovq	RDX, XMM4
	vmovsd	XMM4, QWORD PTR [RCX + 8*RDX]
	vmovhpd	XMM7, XMM4, QWORD PTR [RCX + 8*RAX]
	vmovq	XMM4, R8
	vinsertf128	YMM1, YMM7, XMM1, 1
	vinsertf128	YMM0, YMM10, XMM0, 1
	vmovapd	YMM7, YMMWORD PTR [RSP + 128]
	vinsertf128	YMM2, YMM7, XMM2, 1
	vpextrq	RAX, XMM3, 1
	vmovq	RDX, XMM3
	vpslldq	XMM3, XMM4, 8
	vpaddq	XMM3, XMM5, XMM3
	vmovsd	XMM4, QWORD PTR [RCX + 8*RDX]
	cmp	R9, RDI
	vaddpd	YMM6, YMM6, YMM2
	vaddpd	YMM14, YMM14, YMM0
	vaddpd	YMM15, YMM15, YMM1
	vmovhpd	XMM0, XMM4, QWORD PTR [RCX + 8*RAX]
	vpaddq	XMM1, XMM11, XMM3
	vpextrq	RAX, XMM1, 1
	vmovq	RDX, XMM1
	vmovsd	XMM1, QWORD PTR [RCX + 8*RDX]
	vmovhpd	XMM1, XMM1, QWORD PTR [RCX + 8*RAX]
	vinsertf128	YMM0, YMM1, XMM0, 1
	vaddpd	YMM13, YMM13, YMM0
	jne	-346
	jmpq	12
	xor	R9D, R9D
	vxorpd	YMM14, YMM14, YMM14
	vxorpd	YMM6, YMM6, YMM6
	vaddpd	YMM0, YMM15, YMM13
	vaddpd	YMM0, YMM14, YMM0
	vaddpd	YMM0, YMM6, YMM0
	vextractf128	XMM1, YMM0, 1
	vaddpd	YMM0, YMM0, YMM1
	vhaddpd	YMM0, YMM0, YMM0
	sub	R10, R9
	je	25
	add	R9, RSI
	lea	RCX, QWORD PTR [RCX + 8*R9 + 8]
	vaddsd	XMM0, XMM0, QWORD PTR [RCX]
Source line: 41
	add	RCX, 8
	dec	R10
	jne	-17
Source line: 214
	lea	RSP, QWORD PTR [RBP - 16]
	pop	RBX
	pop	R14
	pop	RBP
	vzeroupper
	ret
Source line: 33
	movabs	RAX, 139889360410464
	mov	RDI, QWORD PTR [RAX]
	movabs	RAX, 139889345816880
	mov	ESI, 33
	call	RAX

julia> code_llvm(Base.sum_seq, (Vector{Float64}, Int, Int))

define double @julia_sum_seq17654(%jl_value_t*, i64, i64) {
top:
  %3 = add i64 %1, 2, !dbg !575
  %4 = icmp sgt i64 %3, %2, !dbg !575
  %5 = add i64 %1, 1, !dbg !575
  %6 = select i1 %4, i64 %5, i64 %2, !dbg !575
  %7 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %6, i64 %3), !dbg !575
  %8 = extractvalue { i64, i1 } %7, 1, !dbg !575
  br i1 %8, label %fail, label %pass, !dbg !575

fail:                                             ; preds = %top
  %9 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !575
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %9, i32 33), !dbg !575
  unreachable, !dbg !575

pass:                                             ; preds = %top
  %10 = extractvalue { i64, i1 } %7, 0, !dbg !575
  %11 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %10, i64 1), !dbg !575
  %12 = extractvalue { i64, i1 } %11, 1, !dbg !575
  br i1 %12, label %fail1, label %pass2, !dbg !575

fail1:                                            ; preds = %pass
  %13 = load %jl_value_t** @jl_overflow_exception, align 8, !dbg !575
  call void @jl_throw_with_superfluous_argument(%jl_value_t* %13, i32 33), !dbg !575
  unreachable, !dbg !575

pass2:                                            ; preds = %pass
  %14 = add i64 %1, -1, !dbg !576
  %15 = getelementptr inbounds %jl_value_t* %0, i64 1, i32 0, !dbg !576
  %16 = load %jl_value_t** %15, align 8, !dbg !576, !tbaa %jtbaa_arrayptr
  %17 = getelementptr %jl_value_t* %16, i64 %14, !dbg !576
  %18 = bitcast %jl_value_t* %17 to double*, !dbg !576
  %19 = load double* %18, align 8, !dbg !576, !tbaa %jtbaa_user
  %20 = getelementptr %jl_value_t* %16, i64 %1, !dbg !576
  %21 = bitcast %jl_value_t* %20 to double*, !dbg !576
  %22 = load double* %21, align 8, !dbg !576, !tbaa %jtbaa_user
  %23 = fadd double %19, %22, !dbg !576
  %24 = extractvalue { i64, i1 } %11, 0, !dbg !575
  %25 = icmp slt i64 %24, 1, !dbg !582
  br i1 %25, label %L7, label %L.preheader, !dbg !582

L.preheader:                                      ; preds = %pass2
  %sunkaddr = ptrtoint %jl_value_t* %0 to i64, !dbg !583
  %sunkaddr30 = add i64 %sunkaddr, 8, !dbg !583
  %sunkaddr31 = inttoptr i64 %sunkaddr30 to %jl_value_t**, !dbg !583
  %26 = load %jl_value_t** %sunkaddr31, align 8, !dbg !583, !tbaa %jtbaa_arrayptr, !llvm.mem.parallel_loop_access !584
  %n.vec = and i64 %24, -16
  %cmp.zero = icmp eq i64 %n.vec, 0
  %27 = insertelement <4 x double> <double undef, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, double %23, i32 0
  br i1 %cmp.zero, label %middle.block, label %vector.ph

vector.ph:                                        ; preds = %L.preheader
  %broadcast.splatinsert14 = insertelement <4 x i64> undef, i64 %5, i32 0
  %broadcast.splat15 = shufflevector <4 x i64> %broadcast.splatinsert14, <4 x i64> undef, <4 x i32> zeroinitializer
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %vec.phi = phi <4 x double> [ %27, %vector.ph ], [ %112, %vector.body ]
  %vec.phi8 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %113, %vector.body ]
  %vec.phi9 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %114, %vector.body ]
  %vec.phi10 = phi <4 x double> [ zeroinitializer, %vector.ph ], [ %115, %vector.body ]
  %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0
  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
  %induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>
  %induction11 = add <4 x i64> %broadcast.splat, <i64 4, i64 5, i64 6, i64 7>
  %induction12 = add <4 x i64> %broadcast.splat, <i64 8, i64 9, i64 10, i64 11>
  %induction13 = add <4 x i64> %broadcast.splat, <i64 12, i64 13, i64 14, i64 15>
  %28 = add <4 x i64> %broadcast.splat15, %induction
  %29 = add <4 x i64> %broadcast.splat15, %induction11
  %30 = add <4 x i64> %broadcast.splat15, %induction12
  %31 = add <4 x i64> %broadcast.splat15, %induction13
  %32 = extractelement <4 x i64> %28, i32 0
  %33 = getelementptr %jl_value_t* %26, i64 %32, !dbg !583
  %34 = extractelement <4 x i64> %28, i32 1
  %35 = getelementptr %jl_value_t* %26, i64 %34, !dbg !583
  %36 = extractelement <4 x i64> %28, i32 2
  %37 = getelementptr %jl_value_t* %26, i64 %36, !dbg !583
  %38 = extractelement <4 x i64> %28, i32 3
  %39 = getelementptr %jl_value_t* %26, i64 %38, !dbg !583
  %40 = extractelement <4 x i64> %29, i32 0
  %41 = getelementptr %jl_value_t* %26, i64 %40, !dbg !583
  %42 = extractelement <4 x i64> %29, i32 1
  %43 = getelementptr %jl_value_t* %26, i64 %42, !dbg !583
  %44 = extractelement <4 x i64> %29, i32 2
  %45 = getelementptr %jl_value_t* %26, i64 %44, !dbg !583
  %46 = extractelement <4 x i64> %29, i32 3
  %47 = getelementptr %jl_value_t* %26, i64 %46, !dbg !583
  %48 = extractelement <4 x i64> %30, i32 0
  %49 = getelementptr %jl_value_t* %26, i64 %48, !dbg !583
  %50 = extractelement <4 x i64> %30, i32 1
  %51 = getelementptr %jl_value_t* %26, i64 %50, !dbg !583
  %52 = extractelement <4 x i64> %30, i32 2
  %53 = getelementptr %jl_value_t* %26, i64 %52, !dbg !583
  %54 = extractelement <4 x i64> %30, i32 3
  %55 = getelementptr %jl_value_t* %26, i64 %54, !dbg !583
  %56 = extractelement <4 x i64> %31, i32 0
  %57 = getelementptr %jl_value_t* %26, i64 %56, !dbg !583
  %58 = extractelement <4 x i64> %31, i32 1
  %59 = getelementptr %jl_value_t* %26, i64 %58, !dbg !583
  %60 = extractelement <4 x i64> %31, i32 2
  %61 = getelementptr %jl_value_t* %26, i64 %60, !dbg !583
  %62 = extractelement <4 x i64> %31, i32 3
  %63 = getelementptr %jl_value_t* %26, i64 %62, !dbg !583
  %64 = bitcast %jl_value_t* %33 to double*
  %65 = load double* %64, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %66 = insertelement <4 x double> undef, double %65, i32 0
  %67 = bitcast %jl_value_t* %35 to double*
  %68 = load double* %67, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %69 = insertelement <4 x double> %66, double %68, i32 1
  %70 = bitcast %jl_value_t* %37 to double*
  %71 = load double* %70, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %72 = insertelement <4 x double> %69, double %71, i32 2
  %73 = bitcast %jl_value_t* %39 to double*
  %74 = load double* %73, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %75 = insertelement <4 x double> %72, double %74, i32 3
  %76 = bitcast %jl_value_t* %41 to double*
  %77 = load double* %76, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %78 = insertelement <4 x double> undef, double %77, i32 0
  %79 = bitcast %jl_value_t* %43 to double*
  %80 = load double* %79, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %81 = insertelement <4 x double> %78, double %80, i32 1
  %82 = bitcast %jl_value_t* %45 to double*
  %83 = load double* %82, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %84 = insertelement <4 x double> %81, double %83, i32 2
  %85 = bitcast %jl_value_t* %47 to double*
  %86 = load double* %85, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %87 = insertelement <4 x double> %84, double %86, i32 3
  %88 = bitcast %jl_value_t* %49 to double*
  %89 = load double* %88, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %90 = insertelement <4 x double> undef, double %89, i32 0
  %91 = bitcast %jl_value_t* %51 to double*
  %92 = load double* %91, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %93 = insertelement <4 x double> %90, double %92, i32 1
  %94 = bitcast %jl_value_t* %53 to double*
  %95 = load double* %94, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %96 = insertelement <4 x double> %93, double %95, i32 2
  %97 = bitcast %jl_value_t* %55 to double*
  %98 = load double* %97, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %99 = insertelement <4 x double> %96, double %98, i32 3
  %100 = bitcast %jl_value_t* %57 to double*
  %101 = load double* %100, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %102 = insertelement <4 x double> undef, double %101, i32 0
  %103 = bitcast %jl_value_t* %59 to double*
  %104 = load double* %103, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %105 = insertelement <4 x double> %102, double %104, i32 1
  %106 = bitcast %jl_value_t* %61 to double*
  %107 = load double* %106, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %108 = insertelement <4 x double> %105, double %107, i32 2
  %109 = bitcast %jl_value_t* %63 to double*
  %110 = load double* %109, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %111 = insertelement <4 x double> %108, double %110, i32 3
  %112 = fadd <4 x double> %vec.phi, %75
  %113 = fadd <4 x double> %vec.phi8, %87
  %114 = fadd <4 x double> %vec.phi9, %99
  %115 = fadd <4 x double> %vec.phi10, %111
  %index.next = add i64 %index, 16
  %116 = icmp eq i64 %n.vec, %index.next
  br i1 %116, label %middle.block, label %vector.body

middle.block:                                     ; preds = %vector.body, %L.preheader
  %resume.val = phi i64 [ 0, %L.preheader ], [ %n.vec, %vector.body ]
  %rdx.vec.exit.phi = phi <4 x double> [ %27, %L.preheader ], [ %112, %vector.body ]
  %rdx.vec.exit.phi18 = phi <4 x double> [ zeroinitializer, %L.preheader ], [ %113, %vector.body ]
  %rdx.vec.exit.phi19 = phi <4 x double> [ zeroinitializer, %L.preheader ], [ %114, %vector.body ]
  %rdx.vec.exit.phi20 = phi <4 x double> [ zeroinitializer, %L.preheader ], [ %115, %vector.body ]
  %bin.rdx = fadd <4 x double> %rdx.vec.exit.phi18, %rdx.vec.exit.phi
  %bin.rdx21 = fadd <4 x double> %rdx.vec.exit.phi19, %bin.rdx
  %bin.rdx22 = fadd <4 x double> %rdx.vec.exit.phi20, %bin.rdx21
  %rdx.shuf = shufflevector <4 x double> %bin.rdx22, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
  %bin.rdx23 = fadd <4 x double> %bin.rdx22, %rdx.shuf
  %rdx.shuf24 = shufflevector <4 x double> %bin.rdx23, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
  %bin.rdx25 = fadd <4 x double> %bin.rdx23, %rdx.shuf24
  %117 = extractelement <4 x double> %bin.rdx25, i32 0
  %cmp.n = icmp eq i64 %24, %resume.val
  br i1 %cmp.n, label %L7, label %L.preheader26

L.preheader26:                                    ; preds = %middle.block
  %118 = add i64 %resume.val, %1, !dbg !583
  %119 = add i64 %118, 1, !dbg !583
  %scevgep = getelementptr %jl_value_t* %26, i64 %119
  %120 = sub i64 %24, %resume.val, !dbg !583
  br label %L, !dbg !583

L:                                                ; preds = %L.preheader26, %L
  %lsr.iv29 = phi i64 [ %120, %L.preheader26 ], [ %lsr.iv.next, %L ], !dbg !586
  %lsr.iv = phi %jl_value_t* [ %scevgep, %L.preheader26 ], [ %scevgep27, %L ]
  %s.0 = phi double [ %122, %L ], [ %117, %L.preheader26 ]
  %lsr.iv28 = bitcast %jl_value_t* %lsr.iv to double*
  %121 = load double* %lsr.iv28, align 8, !dbg !583, !tbaa %jtbaa_user, !llvm.mem.parallel_loop_access !584
  %122 = fadd fast double %s.0, %121, !dbg !583
  %scevgep27 = getelementptr %jl_value_t* %lsr.iv, i64 1, !dbg !586
  %lsr.iv.next = add i64 %lsr.iv29, -1, !dbg !586
  %exitcond = icmp eq i64 %lsr.iv.next, 0, !dbg !586
  br i1 %exitcond, label %L7, label %L, !dbg !586, !llvm.loop.parallel !585, !llvm.vectorizer.already_vectorized !585

L7:                                               ; preds = %L, %middle.block, %pass2
  %s.2 = phi double [ %23, %pass2 ], [ %117, %middle.block ], [ %122, %L ]
  ret double %s.2, !dbg !587
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment