Skip to content

Instantly share code, notes, and snippets.

@briansp2020
Created June 26, 2016 05:12
Show Gist options
  • Save briansp2020/31e440743dafdaccb8afe92282583e14 to your computer and use it in GitHub Desktop.
Save briansp2020/31e440743dafdaccb8afe92282583e14 to your computer and use it in GitHub Desktop.
; ModuleID = '<stdin>'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.LocalQueues = type { [8 x i32], [8 x [400 x i32]], [8 x i32] }
%"class.hc::short_vector::int_2.0" = type { i32, i32 }
%struct.grid_launch_parm = type { %struct.gl_dim3, %struct.gl_dim3, %struct.gl_dim3, %struct.gl_dim3, i32, %"class.hc::accelerator_view"*, %"class.hc::completion_future"* }
%struct.gl_dim3 = type { i32, i32, i32 }
%"class.hc::accelerator_view" = type { %"class.std::__1::shared_ptr" }
%"class.std::__1::shared_ptr" = type { %"class.Kalmar::KalmarQueue"*, %"class.std::__1::__shared_weak_count"* }
%"class.Kalmar::KalmarQueue" = type { i32 (...)**, %"class.Kalmar::KalmarDevice"*, i32, i32 }
%"class.Kalmar::KalmarDevice" = type { i32 (...)**, i32, %"class.std::__1::map", %"class.std::__1::mutex" }
%"class.std::__1::map" = type { %"class.std::__1::__tree.13" }
%"class.std::__1::__tree.13" = type { %"class.std::__1::__tree_node.14"*, %"class.std::__1::__compressed_pair", %"class.std::__1::__compressed_pair.20" }
%"class.std::__1::__tree_node.14" = type { %"class.std::__1::__tree_node_base.base", %"union.std::__1::__value_type" }
%"class.std::__1::__tree_node_base.base" = type <{ %"class.std::__1::__tree_end_node", %"class.std::__1::__tree_node_base"*, %"class.std::__1::__tree_node_base"*, i8 }>
%"class.std::__1::__tree_end_node" = type { %"class.std::__1::__tree_node_base"* }
%"class.std::__1::__tree_node_base" = type { %"class.std::__1::__tree_end_node", %"class.std::__1::__tree_node_base"*, %"class.std::__1::__tree_node_base"*, i8 }
%"union.std::__1::__value_type" = type { %"struct.std::__1::pair" }
%"struct.std::__1::pair" = type { %"class.std::__1::__thread_id", %"class.std::__1::shared_ptr" }
%"class.std::__1::__thread_id" = type { i64 }
%"class.std::__1::__compressed_pair" = type { %"class.std::__1::__libcpp_compressed_pair_imp" }
%"class.std::__1::__libcpp_compressed_pair_imp" = type { %"class.std::__1::__tree_end_node" }
%"class.std::__1::__compressed_pair.20" = type { %"class.std::__1::__libcpp_compressed_pair_imp.21" }
%"class.std::__1::__libcpp_compressed_pair_imp.21" = type { i64 }
%"class.std::__1::mutex" = type { %union.pthread_mutex_t }
%union.pthread_mutex_t = type { %"struct.(anonymous union)::__pthread_mutex_s" }
%"struct.(anonymous union)::__pthread_mutex_s" = type { i32, i32, i32, i32, i32, i16, i16, %struct.__pthread_internal_list }
%struct.__pthread_internal_list = type { %struct.__pthread_internal_list*, %struct.__pthread_internal_list* }
%"class.std::__1::__shared_weak_count" = type { %"class.std::__1::__shared_count", i64 }
%"class.std::__1::__shared_count" = type { i32 (...)**, i64 }
%"class.hc::completion_future" = type { %"class.std::__1::shared_future", %"class.std::__1::thread"*, %"class.std::__1::shared_ptr.24" }
%"class.std::__1::shared_future" = type { %"class.std::__1::__assoc_sub_state"* }
%"class.std::__1::__assoc_sub_state" = type { %"class.std::__1::__shared_count", %"class.std::exception_ptr", %"class.std::__1::mutex", %"class.std::__1::condition_variable", i32 }
%"class.std::exception_ptr" = type { i8* }
%"class.std::__1::condition_variable" = type { %union.pthread_cond_t }
%union.pthread_cond_t = type { %struct.anon }
%struct.anon = type { i32, i32, i64, i64, i64, i8*, i32, i32 }
%"class.std::__1::thread" = type { i64 }
%"class.std::__1::shared_ptr.24" = type { %"class.Kalmar::KalmarAsyncOp"*, %"class.std::__1::__shared_weak_count"* }
%"class.Kalmar::KalmarAsyncOp" = type { i32 (...)** }
@count24 = addrspace(1) global i32 0, align 4
@no_of_nodes_vol27 = addrspace(1) global i32 0, align 4
@stay_vol29 = addrspace(1) global i32 0, align 4
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447 = internal addrspace(3) global [512 x i32] undef, section "clamp_opencl_local", align 16
@ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116 = internal addrspace(3) global %struct.LocalQueues undef, section "clamp_opencl_local", align 4
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217 = internal addrspace(3) global [8 x i32] undef, section "clamp_opencl_local", align 16
@ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318 = internal unnamed_addr addrspace(3) global i32 undef, section "clamp_opencl_local", align 4
; Function Attrs: nounwind readnone
declare i64 @amp_get_global_id(i32) #0
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1
; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1
; Function Attrs: nounwind readnone
declare i64 @amp_get_local_size(i32) #0
; Function Attrs: nounwind readnone
declare i64 @hc_get_num_groups(i32) #0
; Function Attrs: noduplicate
declare void @hc_barrier(i32) #2
; Function Attrs: nounwind readnone
declare i64 @amp_get_local_id(i32) #3
; Function Attrs: nounwind readnone
declare i64 @hc_get_group_id(i32) #3
; Function Attrs: nounwind readnone
declare i64 @amp_get_group_id(i32) #0
; Function Attrs: nounwind uwtable
define spir_kernel void @ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPcc(i64, i8 addrspace(1)*, i8 signext) #4 align 2 {
%4 = tail call spir_func i64 @amp_get_global_id(i32 0) #7
%sext.i = shl i64 %4, 32
%5 = ashr exact i64 %sext.i, 32
%6 = icmp ult i64 %5, %0
br i1 %6, label %.lr.ph.i, label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit
.lr.ph.i: ; preds = %3
%7 = tail call spir_func i64 @amp_get_local_size(i32 0) #7
%8 = tail call spir_func i64 @hc_get_num_groups(i32 0) #7
%9 = shl i64 %7, 32
%sext3.i = mul i64 %9, %8
%10 = ashr exact i64 %sext3.i, 32
br label %11
; <label>:11 ; preds = %11, %.lr.ph.i
%indvars.iv.i = phi i64 [ %5, %.lr.ph.i ], [ %indvars.iv.next.i, %11 ]
%12 = getelementptr inbounds i8 addrspace(1)* %1, i64 %indvars.iv.i
store i8 %2, i8 addrspace(1)* %12, align 1, !tbaa !23
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %10
%13 = icmp ult i64 %indvars.iv.next.i, %0
br i1 %13, label %11, label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit
_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit: ; preds = %11
br label %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit
_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit: ; preds = %_ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit, %3
ret void
}
; Function Attrs: nounwind uwtable
define spir_kernel void @ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPjj(i64, i32 addrspace(1)*, i32) #4 align 2 {
%4 = tail call spir_func i64 @amp_get_global_id(i32 0) #7
%sext.i = shl i64 %4, 32
%5 = ashr exact i64 %sext.i, 32
%6 = icmp ult i64 %5, %0
br i1 %6, label %.lr.ph.i, label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit
.lr.ph.i: ; preds = %3
%7 = tail call spir_func i64 @amp_get_local_size(i32 0) #7
%8 = tail call spir_func i64 @hc_get_num_groups(i32 0) #7
%9 = shl i64 %7, 32
%sext3.i = mul i64 %9, %8
%10 = ashr exact i64 %sext3.i, 32
br label %11
; <label>:11 ; preds = %11, %.lr.ph.i
%indvars.iv.i = phi i64 [ %5, %.lr.ph.i ], [ %indvars.iv.next.i, %11 ]
%12 = getelementptr inbounds i32 addrspace(1)* %1, i64 %indvars.iv.i
store i32 %2, i32 addrspace(1)* %12, align 4, !tbaa !26
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %10
%13 = icmp ult i64 %indvars.iv.next.i, %0
br i1 %13, label %11, label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit
_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit: ; preds = %11
br label %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit
_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit: ; preds = %_ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENKUlNS0_11tiled_indexILi1EEEE_clES7_.exit.loopexit, %3
ret void
}
; Function Attrs: uwtable
define spir_kernel void @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*) #5 align 2 {
%25 = alloca %struct.grid_launch_parm, align 8
%26 = alloca %struct.grid_launch_parm, align 16
%.sroa.23 = alloca [20 x i8], align 4
%27 = insertelement <4 x i32> undef, i32 %0, i32 0
%28 = insertelement <4 x i32> %27, i32 %1, i32 1
%29 = insertelement <4 x i32> %28, i32 %2, i32 2
%30 = insertelement <4 x i32> %29, i32 %3, i32 3
%31 = insertelement <4 x i32> undef, i32 %4, i32 0
%32 = insertelement <4 x i32> %31, i32 %5, i32 1
%33 = tail call spir_func i64 @amp_get_local_id(i32 2) #7
%34 = tail call spir_func i64 @amp_get_local_id(i32 1) #7
%35 = tail call spir_func i64 @amp_get_local_id(i32 0) #7
%36 = trunc i64 %33 to i32
%37 = trunc i64 %34 to i32
%38 = trunc i64 %35 to i32
%39 = tail call spir_func i64 @amp_get_group_id(i32 2) #7
%40 = tail call spir_func i64 @amp_get_group_id(i32 1) #7
%41 = tail call spir_func i64 @amp_get_group_id(i32 0) #7
%42 = trunc i64 %39 to i32
%43 = trunc i64 %40 to i32
%44 = trunc i64 %41 to i32
%45 = bitcast %struct.grid_launch_parm* %26 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %45)
%46 = insertelement <4 x i32> %32, i32 %44, i32 2
%47 = insertelement <4 x i32> %46, i32 %43, i32 3
%48 = insertelement <4 x i32> undef, i32 %42, i32 0
%49 = insertelement <4 x i32> %48, i32 %38, i32 1
%50 = insertelement <4 x i32> %49, i32 %37, i32 2
%51 = insertelement <4 x i32> %50, i32 %36, i32 3
%52 = bitcast %struct.grid_launch_parm* %26 to <4 x i32>*
store <4 x i32> %30, <4 x i32>* %52, align 16
%53 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 1, i32 1
%54 = bitcast i32* %53 to <4 x i32>*
store <4 x i32> %47, <4 x i32>* %54, align 8
%55 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 2, i32 2
%56 = bitcast i32* %55 to <4 x i32>*
store <4 x i32> %51, <4 x i32>* %56, align 8
%57 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 4
store i32 %12, i32* %57, align 16
%58 = getelementptr inbounds i8* %45, i64 52
%59 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %58, i8* %59, i64 20, i32 4, i1 false)
%60 = bitcast %struct.grid_launch_parm* %25 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %60)
%tmp = bitcast %struct.grid_launch_parm* %25 to i8*
%tmp1 = bitcast %struct.grid_launch_parm* %26 to i8*
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false)
%61 = call spir_func i64 @amp_get_local_id(i32 0) #7
%62 = icmp slt i64 %61, 8
br i1 %62, label %63, label %74
; <label>:63 ; preds = %24
%sext2.i = shl i64 %61, 32
%64 = ashr exact i64 %sext2.i, 32
%65 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %64
store i32 0, i32 addrspace(3)* %65, align 4, !tbaa !26
%66 = call spir_func i64 @amp_get_local_size(i32 0) #7
%67 = lshr i64 %66, 3
%68 = and i64 %66, 7
%69 = icmp slt i64 %61, %68
%70 = zext i1 %69 to i64
%71 = add nuw nsw i64 %70, %67
%72 = trunc i64 %71 to i32
%73 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 2, i64 %64
store i32 %72, i32 addrspace(3)* %73, align 4, !tbaa !26
br label %74
; <label>:74 ; preds = %63, %24
call spir_func void @hc_barrier(i32 1) #8
%75 = call spir_func i64 @hc_get_group_id(i32 0) #7
%76 = shl nsw i64 %75, 9
%77 = add nsw i64 %76, %61
%78 = trunc i64 %77 to i32
%79 = icmp slt i32 %78, %19
br i1 %79, label %80, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
; <label>:80 ; preds = %74
%sext.i = shl i64 %77, 32
%81 = ashr exact i64 %sext.i, 32
%82 = getelementptr inbounds i32 addrspace(1)* %13, i64 %81
%83 = load i32 addrspace(1)* %82, align 4, !tbaa !26
%84 = sext i32 %83 to i64
%85 = getelementptr inbounds i32 addrspace(1)* %17, i64 %84
store i32 16677221, i32 addrspace(1)* %85, align 4, !tbaa !26
%86 = getelementptr inbounds i32 addrspace(1)* %18, i64 %84
%87 = load i32 addrspace(1)* %86, align 4, !tbaa !26
%88 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %84, i32 0
%89 = load i32 addrspace(1)* %88, align 4, !tbaa !28
%90 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %84, i32 1
%91 = load i32 addrspace(1)* %90, align 4, !tbaa !30
%92 = add nsw i32 %91, %89
%93 = icmp sgt i32 %91, 0
br i1 %93, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
.lr.ph.i.i: ; preds = %80
%94 = and i64 %61, 7
%95 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %94
%96 = sext i32 %89 to i64
br label %97
; <label>:97 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i
%indvars.iv.i.i = phi i64 [ %96, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ]
%98 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0
%99 = load i32 addrspace(1)* %98, align 4, !tbaa !28
%100 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1
%101 = load i32 addrspace(1)* %100, align 4, !tbaa !30
%102 = add nsw i32 %101, %87
%103 = sext i32 %99 to i64
%104 = getelementptr inbounds i32 addrspace(1)* %18, i64 %103
%105 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %104, i32 %102) #9
%106 = icmp sgt i32 %105, %102
br i1 %106, label %107, label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:107 ; preds = %97
%108 = getelementptr inbounds i32 addrspace(1)* %17, i64 %103
%109 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %108, i32 %21) #9
%110 = icmp eq i32 %109, %21
br i1 %110, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %111
; <label>:111 ; preds = %107
%112 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %95, i32 1) #9
%113 = icmp sgt i32 %112, 399
br i1 %113, label %114, label %115
; <label>:114 ; preds = %111
store i32 1, i32 addrspace(1)* %23, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:115 ; preds = %111
%116 = sext i32 %112 to i64
%117 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 1, i64 %94, i64 %116
store i32 %99, i32 addrspace(3)* %117, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %115, %114, %107, %97
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1
%118 = trunc i64 %indvars.iv.next.i.i to i32
%119 = icmp slt i32 %118, %92
br i1 %119, label %97, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %80, %74
call spir_func void @hc_barrier(i32 1) #8
%120 = icmp eq i64 %61, 0
br i1 %120, label %121, label %154
; <label>:121 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
%122 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 0
store i32 0, i32 addrspace(3)* %122, align 16, !tbaa !26
%123 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 0
%124 = load i32 addrspace(3)* %123, align 4, !tbaa !26
%125 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 1
store i32 %124, i32 addrspace(3)* %125, align 4, !tbaa !26
%126 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 1
%127 = load i32 addrspace(3)* %126, align 4, !tbaa !26
%128 = add nsw i32 %127, %124
%129 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 2
store i32 %128, i32 addrspace(3)* %129, align 8, !tbaa !26
%130 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 2
%131 = load i32 addrspace(3)* %130, align 4, !tbaa !26
%132 = add nsw i32 %131, %128
%133 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 3
store i32 %132, i32 addrspace(3)* %133, align 4, !tbaa !26
%134 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 3
%135 = load i32 addrspace(3)* %134, align 4, !tbaa !26
%136 = add nsw i32 %135, %132
%137 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 4
store i32 %136, i32 addrspace(3)* %137, align 16, !tbaa !26
%138 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 4
%139 = load i32 addrspace(3)* %138, align 4, !tbaa !26
%140 = add nsw i32 %139, %136
%141 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 5
store i32 %140, i32 addrspace(3)* %141, align 4, !tbaa !26
%142 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 5
%143 = load i32 addrspace(3)* %142, align 4, !tbaa !26
%144 = add nsw i32 %143, %140
%145 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 6
store i32 %144, i32 addrspace(3)* %145, align 8, !tbaa !26
%146 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 6
%147 = load i32 addrspace(3)* %146, align 4, !tbaa !26
%148 = add nsw i32 %147, %144
%149 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 7
store i32 %148, i32 addrspace(3)* %149, align 4, !tbaa !26
%150 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 7
%151 = load i32 addrspace(3)* %150, align 4, !tbaa !26
%152 = add nsw i32 %151, %148
%153 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* %20, i32 %152) #9
store i32 %153, i32 addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318, align 4, !tbaa !26
br label %154
; <label>:154 ; preds = %121, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
call spir_func void @hc_barrier(i32 1) #8
%155 = load i32 addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E5shift1318, align 4, !tbaa !26
%156 = sext i32 %155 to i64
%157 = and i64 %61, 7
%158 = lshr i64 %61, 3
%159 = trunc i64 %158 to i32
%160 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 0, i64 %157
%161 = load i32 addrspace(3)* %160, align 4, !tbaa !26
%162 = icmp slt i32 %159, %161
br i1 %162, label %.lr.ph.i1.i, label %_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit
.lr.ph.i1.i: ; preds = %154
%163 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q1217, i64 0, i64 %157
%164 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 2, i64 %157
br label %165
; <label>:165 ; preds = %165, %.lr.ph.i1.i
%local_shift.01.i.i = phi i32 [ %159, %.lr.ph.i1.i ], [ %174, %165 ]
%166 = sext i32 %local_shift.01.i.i to i64
%167 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q1116, i64 0, i32 1, i64 %157, i64 %166
%168 = load i32 addrspace(3)* %167, align 4, !tbaa !26
%169 = load i32 addrspace(3)* %163, align 4, !tbaa !26
%170 = add nsw i32 %169, %local_shift.01.i.i
%171 = sext i32 %170 to i64
%.sum.i = add nsw i64 %171, %156
%172 = getelementptr inbounds i32 addrspace(1)* %14, i64 %.sum.i
store i32 %168, i32 addrspace(1)* %172, align 4, !tbaa !26
%173 = load i32 addrspace(3)* %164, align 4, !tbaa !26
%174 = add nsw i32 %173, %local_shift.01.i.i
%175 = load i32 addrspace(3)* %160, align 4, !tbaa !26
%176 = icmp slt i32 %174, %175
br i1 %176, label %165, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %165
br label %_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit
_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %154
%177 = bitcast %struct.grid_launch_parm* %25 to i8*
call spir_func void @llvm.lifetime.end(i64 72, i8* %177)
call spir_func void @llvm.lifetime.end(i64 72, i8* %45)
ret void
}
declare i32 @atomic_min_int_global(i32 addrspace(1)*, i32) #6
declare i32 @atomic_exchange_int_global(i32 addrspace(1)*, i32) #6
declare i32 @atomic_add_int_local(i32 addrspace(3)*, i32) #6
declare i32 @atomic_add_int_global(i32 addrspace(1)*, i32) #6
; Function Attrs: uwtable
define spir_kernel void @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) #5 align 2 {
%28 = alloca %struct.grid_launch_parm, align 8
%29 = alloca %struct.grid_launch_parm, align 16
%.sroa.23 = alloca [20 x i8], align 4
%30 = insertelement <4 x i32> undef, i32 %0, i32 0
%31 = insertelement <4 x i32> %30, i32 %1, i32 1
%32 = insertelement <4 x i32> %31, i32 %2, i32 2
%33 = insertelement <4 x i32> %32, i32 %3, i32 3
%34 = insertelement <4 x i32> undef, i32 %4, i32 0
%35 = insertelement <4 x i32> %34, i32 %5, i32 1
%36 = tail call spir_func i64 @amp_get_local_id(i32 2) #7
%37 = tail call spir_func i64 @amp_get_local_id(i32 1) #7
%38 = tail call spir_func i64 @amp_get_local_id(i32 0) #7
%39 = trunc i64 %36 to i32
%40 = trunc i64 %37 to i32
%41 = trunc i64 %38 to i32
%42 = tail call spir_func i64 @amp_get_group_id(i32 2) #7
%43 = tail call spir_func i64 @amp_get_group_id(i32 1) #7
%44 = tail call spir_func i64 @amp_get_group_id(i32 0) #7
%45 = trunc i64 %42 to i32
%46 = trunc i64 %43 to i32
%47 = trunc i64 %44 to i32
%48 = bitcast %struct.grid_launch_parm* %29 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %48)
%49 = insertelement <4 x i32> %35, i32 %47, i32 2
%50 = insertelement <4 x i32> %49, i32 %46, i32 3
%51 = insertelement <4 x i32> undef, i32 %45, i32 0
%52 = insertelement <4 x i32> %51, i32 %41, i32 1
%53 = insertelement <4 x i32> %52, i32 %40, i32 2
%54 = insertelement <4 x i32> %53, i32 %39, i32 3
%55 = bitcast %struct.grid_launch_parm* %29 to <4 x i32>*
store <4 x i32> %33, <4 x i32>* %55, align 16
%56 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 1, i32 1
%57 = bitcast i32* %56 to <4 x i32>*
store <4 x i32> %50, <4 x i32>* %57, align 8
%58 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 2, i32 2
%59 = bitcast i32* %58 to <4 x i32>*
store <4 x i32> %54, <4 x i32>* %59, align 8
%60 = getelementptr inbounds %struct.grid_launch_parm* %29, i64 0, i32 4
store i32 %12, i32* %60, align 16
%61 = getelementptr inbounds i8* %48, i64 52
%62 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %61, i8* %62, i64 20, i32 4, i1 false)
%63 = bitcast %struct.grid_launch_parm* %28 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %63)
%tmp = bitcast %struct.grid_launch_parm* %28 to i8*
%tmp1 = bitcast %struct.grid_launch_parm* %29 to i8*
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false)
%64 = call spir_func i64 @amp_get_local_id(i32 0) #7
%65 = icmp eq i64 %64, 0
br i1 %65, label %66, label %71
; <label>:66 ; preds = %27
store i32 1, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%67 = call spir_func i64 @hc_get_group_id(i32 0) #7
%68 = icmp eq i64 %67, 0
br i1 %68, label %69, label %71
; <label>:69 ; preds = %66
%70 = load i32 addrspace(1)* %19, align 4, !tbaa !26
store volatile i32 %70, i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26
br label %71
; <label>:71 ; preds = %69, %66, %27
%72 = call spir_func i32 @atomic_or_int_global(i32 addrspace(1)* %25, i32 0) #9
%73 = icmp slt i64 %64, 8
%sext7.i = shl i64 %64, 32
%74 = ashr exact i64 %sext7.i, 32
%75 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 %74
%76 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 2, i64 %74
%77 = and i64 %64, 7
%78 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 %77
%79 = lshr i64 %64, 3
%80 = trunc i64 %79 to i32
%81 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 %77
%82 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 2, i64 %77
br label %83
; <label>:83 ; preds = %_Z20start_global_barrieri.exit2.i, %71
%kt.0.i = phi i32 [ %72, %71 ], [ %215, %_Z20start_global_barrieri.exit2.i ]
%.0.i = phi i32 [ %21, %71 ], [ %.1.i, %_Z20start_global_barrieri.exit2.i ]
br i1 %73, label %84, label %92
; <label>:84 ; preds = %83
store i32 0, i32 addrspace(3)* %75, align 4, !tbaa !26
%85 = call spir_func i64 @amp_get_local_size(i32 0) #7
%86 = lshr i64 %85, 3
%87 = and i64 %85, 7
%88 = icmp slt i64 %64, %87
%89 = zext i1 %88 to i64
%90 = add nuw nsw i64 %89, %86
%91 = trunc i64 %90 to i32
store i32 %91, i32 addrspace(3)* %76, align 4, !tbaa !26
br label %92
; <label>:92 ; preds = %84, %83
br i1 %65, label %93, label %95
; <label>:93 ; preds = %92
%94 = load volatile i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26
store i32 %94, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934, align 4, !tbaa !26
br label %95
; <label>:95 ; preds = %93, %92
call spir_func void @hc_barrier(i32 1) #8
%96 = call spir_func i64 @hc_get_group_id(i32 0) #7
%97 = shl nsw i64 %96, 9
%98 = add nsw i64 %97, %64
%99 = trunc i64 %98 to i32
%100 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E14no_of_nodes_sm934, align 4, !tbaa !26
%101 = icmp slt i32 %99, %100
br i1 %101, label %102, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
; <label>:102 ; preds = %95
%103 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%104 = icmp ne i32 %103, 0
%105 = select i1 %104, i32 addrspace(1)* %13, i32 addrspace(1)* %14
%sext.i = shl i64 %98, 32
%106 = ashr exact i64 %sext.i, 32
%107 = getelementptr inbounds i32 addrspace(1)* %105, i64 %106
%108 = call spir_func i32 @atomic_or_int_global(i32 addrspace(1)* %107, i32 0) #9
%109 = sext i32 %108 to i64
%110 = getelementptr inbounds i32 addrspace(1)* %17, i64 %109
store i32 16677221, i32 addrspace(1)* %110, align 4, !tbaa !26
%111 = getelementptr inbounds i32 addrspace(1)* %18, i64 %109
%112 = load i32 addrspace(1)* %111, align 4, !tbaa !26
%113 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %109, i32 0
%114 = load i32 addrspace(1)* %113, align 4, !tbaa !28
%115 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %109, i32 1
%116 = load i32 addrspace(1)* %115, align 4, !tbaa !30
%117 = add nsw i32 %116, %114
%118 = icmp sgt i32 %116, 0
br i1 %118, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
.lr.ph.i.i: ; preds = %102
%119 = sext i32 %114 to i64
br label %120
; <label>:120 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i
%indvars.iv.i.i = phi i64 [ %119, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ]
%121 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0
%122 = load i32 addrspace(1)* %121, align 4, !tbaa !28
%123 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1
%124 = load i32 addrspace(1)* %123, align 4, !tbaa !30
%125 = add nsw i32 %124, %112
%126 = sext i32 %122 to i64
%127 = getelementptr inbounds i32 addrspace(1)* %18, i64 %126
%128 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %127, i32 %125) #9
%129 = icmp sgt i32 %128, %125
br i1 %129, label %130, label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:130 ; preds = %120
%131 = getelementptr inbounds i32 addrspace(1)* %17, i64 %126
%132 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %131, i32 %.0.i) #9
%133 = icmp eq i32 %132, %.0.i
br i1 %133, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %134
; <label>:134 ; preds = %130
%135 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %78, i32 1) #9
%136 = icmp sgt i32 %135, 399
br i1 %136, label %137, label %138
; <label>:137 ; preds = %134
store i32 1, i32 addrspace(1)* %26, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:138 ; preds = %134
%139 = sext i32 %135 to i64
%140 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 1, i64 %77, i64 %139
store i32 %122, i32 addrspace(3)* %140, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %138, %137, %130, %120
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1
%141 = trunc i64 %indvars.iv.next.i.i to i32
%142 = icmp slt i32 %141, %117
br i1 %142, label %120, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %102, %95
call spir_func void @hc_barrier(i32 1) #8
br i1 %65, label %143, label %176
; <label>:143 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
%144 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 0
store i32 0, i32 addrspace(3)* %144, align 16, !tbaa !26
%145 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 0
%146 = load i32 addrspace(3)* %145, align 4, !tbaa !26
%147 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 1
store i32 %146, i32 addrspace(3)* %147, align 4, !tbaa !26
%148 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 1
%149 = load i32 addrspace(3)* %148, align 4, !tbaa !26
%150 = add nsw i32 %149, %146
%151 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 2
store i32 %150, i32 addrspace(3)* %151, align 8, !tbaa !26
%152 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 2
%153 = load i32 addrspace(3)* %152, align 4, !tbaa !26
%154 = add nsw i32 %153, %150
%155 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 3
store i32 %154, i32 addrspace(3)* %155, align 4, !tbaa !26
%156 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 3
%157 = load i32 addrspace(3)* %156, align 4, !tbaa !26
%158 = add nsw i32 %157, %154
%159 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 4
store i32 %158, i32 addrspace(3)* %159, align 16, !tbaa !26
%160 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 4
%161 = load i32 addrspace(3)* %160, align 4, !tbaa !26
%162 = add nsw i32 %161, %158
%163 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 5
store i32 %162, i32 addrspace(3)* %163, align 4, !tbaa !26
%164 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 5
%165 = load i32 addrspace(3)* %164, align 4, !tbaa !26
%166 = add nsw i32 %165, %162
%167 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 6
store i32 %166, i32 addrspace(3)* %167, align 8, !tbaa !26
%168 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 6
%169 = load i32 addrspace(3)* %168, align 4, !tbaa !26
%170 = add nsw i32 %169, %166
%171 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8prefix_q732, i64 0, i64 7
store i32 %170, i32 addrspace(3)* %171, align 4, !tbaa !26
%172 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 0, i64 7
%173 = load i32 addrspace(3)* %172, align 4, !tbaa !26
%174 = add nsw i32 %173, %170
%175 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* %20, i32 %174) #9
store i32 %175, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833, align 4, !tbaa !26
br label %176
; <label>:176 ; preds = %143, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
call spir_func void @hc_barrier(i32 1) #8
%177 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%178 = icmp ne i32 %177, 0
%179 = select i1 %178, i32 addrspace(1)* %14, i32 addrspace(1)* %13
%180 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E5shift833, align 4, !tbaa !26
%181 = sext i32 %180 to i64
%182 = load i32 addrspace(3)* %78, align 4, !tbaa !26
%183 = icmp slt i32 %80, %182
br i1 %183, label %.lr.ph.i1.preheader.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
.lr.ph.i1.preheader.i: ; preds = %176
br label %.lr.ph.i1.i
.lr.ph.i1.i: ; preds = %.lr.ph.i1.i, %.lr.ph.i1.preheader.i
%local_shift.01.i.i = phi i32 [ %192, %.lr.ph.i1.i ], [ %80, %.lr.ph.i1.preheader.i ]
%184 = sext i32 %local_shift.01.i.i to i64
%185 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E7local_q631, i64 0, i32 1, i64 %77, i64 %184
%186 = load i32 addrspace(3)* %185, align 4, !tbaa !26
%187 = load i32 addrspace(3)* %81, align 4, !tbaa !26
%188 = add nsw i32 %187, %local_shift.01.i.i
%189 = sext i32 %188 to i64
%.sum.i = add nsw i64 %189, %181
%190 = getelementptr inbounds i32 addrspace(1)* %179, i64 %.sum.i
store i32 %186, i32 addrspace(1)* %190, align 4, !tbaa !26
%191 = load i32 addrspace(3)* %82, align 4, !tbaa !26
%192 = add nsw i32 %191, %local_shift.01.i.i
%193 = load i32 addrspace(3)* %78, align 4, !tbaa !26
%194 = icmp slt i32 %192, %193
br i1 %194, label %.lr.ph.i1.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %.lr.ph.i1.i
br label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit.i: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %176
br i1 %65, label %195, label %200
; <label>:195 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
%196 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%197 = add nsw i32 %196, 1
%198 = srem i32 %197, 2
store i32 %198, i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%199 = icmp eq i32 %.0.i, 16677219
%..i = select i1 %199, i32 16677220, i32 16677219
br label %200
; <label>:200 ; preds = %195, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
%.1.i = phi i32 [ %.0.i, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i ], [ %..i, %195 ]
call spir_func void @hc_barrier(i32 1) #8
br i1 %65, label %201, label %_Z20start_global_barrieri.exit.i
; <label>:201 ; preds = %200
%202 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* @count24, i32 1) #9
%203 = mul i32 %kt.0.i, 14
%204 = add i32 %203, 14
br label %205
; <label>:205 ; preds = %205, %201
%206 = load volatile i32 addrspace(1)* @count24, align 4, !tbaa !26
%207 = icmp slt i32 %206, %204
br i1 %207, label %205, label %_Z20start_global_barrieri.exit.loopexit.i
_Z20start_global_barrieri.exit.loopexit.i: ; preds = %205
br label %_Z20start_global_barrieri.exit.i
_Z20start_global_barrieri.exit.i: ; preds = %_Z20start_global_barrieri.exit.loopexit.i, %200
call spir_func void @hc_barrier(i32 1) #8
%208 = or i64 %96, %64
%brmerge.i = icmp eq i64 %208, 0
br i1 %brmerge.i, label %209, label %214
; <label>:209 ; preds = %_Z20start_global_barrieri.exit.i
store volatile i32 0, i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26
%210 = load i32 addrspace(1)* %20, align 4, !tbaa !26
%.off.i = add i32 %210, -513
%211 = icmp ult i32 %.off.i, 6655
br i1 %211, label %212, label %214
; <label>:212 ; preds = %209
store volatile i32 1, i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26
%213 = load i32 addrspace(1)* %20, align 4, !tbaa !26
store volatile i32 %213, i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26
store i32 0, i32 addrspace(1)* %20, align 4, !tbaa !26
br label %214
; <label>:214 ; preds = %212, %209, %_Z20start_global_barrieri.exit.i
%215 = add nsw i32 %kt.0.i, 2
call spir_func void @hc_barrier(i32 1) #8
br i1 %65, label %216, label %_Z20start_global_barrieri.exit2.i
; <label>:216 ; preds = %214
%217 = call spir_func i32 @atomic_add_int_global(i32 addrspace(1)* @count24, i32 1) #9
%218 = mul nsw i32 %215, 14
br label %219
; <label>:219 ; preds = %219, %216
%220 = load volatile i32 addrspace(1)* @count24, align 4, !tbaa !26
%221 = icmp slt i32 %220, %218
br i1 %221, label %219, label %_Z20start_global_barrieri.exit2.loopexit.i
_Z20start_global_barrieri.exit2.loopexit.i: ; preds = %219
br label %_Z20start_global_barrieri.exit2.i
_Z20start_global_barrieri.exit2.i: ; preds = %_Z20start_global_barrieri.exit2.loopexit.i, %214
call spir_func void @hc_barrier(i32 1) #8
%222 = load volatile i32 addrspace(1)* @stay_vol29, align 4, !tbaa !26
%223 = icmp eq i32 %222, 0
br i1 %223, label %224, label %83
; <label>:224 ; preds = %_Z20start_global_barrieri.exit2.i
%.not4.i = icmp ne i64 %96, 0
%.not5.i = xor i1 %65, true
%brmerge6.i = or i1 %.not4.i, %.not5.i
br i1 %brmerge6.i, label %_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit, label %225
; <label>:225 ; preds = %224
store i32 %215, i32 addrspace(1)* %25, align 4, !tbaa !26
%226 = load i32 addrspace(3)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1.ZZ26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_E8odd_time1035, align 4, !tbaa !26
%227 = add nsw i32 %226, 1
%228 = srem i32 %227, 2
store i32 %228, i32 addrspace(1)* %23, align 4, !tbaa !26
%229 = load volatile i32 addrspace(1)* @no_of_nodes_vol27, align 4, !tbaa !26
store i32 %229, i32 addrspace(1)* %19, align 4, !tbaa !26
br label %_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit
_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0_.exit: ; preds = %225, %224
%230 = bitcast %struct.grid_launch_parm* %28 to i8*
call spir_func void @llvm.lifetime.end(i64 72, i8* %230)
call spir_func void @llvm.lifetime.end(i64 72, i8* %48)
ret void
}
declare i32 @atomic_or_int_global(i32 addrspace(1)*, i32) #6
; Function Attrs: uwtable
define spir_kernel void @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*) #5 align 2 {
%25 = alloca %struct.grid_launch_parm, align 8
%26 = alloca %struct.grid_launch_parm, align 16
%.sroa.23 = alloca [20 x i8], align 4
%27 = insertelement <4 x i32> undef, i32 %0, i32 0
%28 = insertelement <4 x i32> %27, i32 %1, i32 1
%29 = insertelement <4 x i32> %28, i32 %2, i32 2
%30 = insertelement <4 x i32> %29, i32 %3, i32 3
%31 = insertelement <4 x i32> undef, i32 %4, i32 0
%32 = insertelement <4 x i32> %31, i32 %5, i32 1
%33 = tail call spir_func i64 @amp_get_local_id(i32 2) #7
%34 = tail call spir_func i64 @amp_get_local_id(i32 1) #7
%35 = tail call spir_func i64 @amp_get_local_id(i32 0) #7
%36 = trunc i64 %33 to i32
%37 = trunc i64 %34 to i32
%38 = trunc i64 %35 to i32
%39 = tail call spir_func i64 @amp_get_group_id(i32 2) #7
%40 = tail call spir_func i64 @amp_get_group_id(i32 1) #7
%41 = tail call spir_func i64 @amp_get_group_id(i32 0) #7
%42 = trunc i64 %39 to i32
%43 = trunc i64 %40 to i32
%44 = trunc i64 %41 to i32
%45 = bitcast %struct.grid_launch_parm* %26 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %45)
%46 = insertelement <4 x i32> %32, i32 %44, i32 2
%47 = insertelement <4 x i32> %46, i32 %43, i32 3
%48 = insertelement <4 x i32> undef, i32 %42, i32 0
%49 = insertelement <4 x i32> %48, i32 %38, i32 1
%50 = insertelement <4 x i32> %49, i32 %37, i32 2
%51 = insertelement <4 x i32> %50, i32 %36, i32 3
%52 = bitcast %struct.grid_launch_parm* %26 to <4 x i32>*
store <4 x i32> %30, <4 x i32>* %52, align 16
%53 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 1, i32 1
%54 = bitcast i32* %53 to <4 x i32>*
store <4 x i32> %47, <4 x i32>* %54, align 8
%55 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 2, i32 2
%56 = bitcast i32* %55 to <4 x i32>*
store <4 x i32> %51, <4 x i32>* %56, align 8
%57 = getelementptr inbounds %struct.grid_launch_parm* %26, i64 0, i32 4
store i32 %12, i32* %57, align 16
%58 = getelementptr inbounds i8* %45, i64 52
%59 = getelementptr inbounds [20 x i8]* %.sroa.23, i64 0, i64 0
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %58, i8* %59, i64 20, i32 4, i1 false)
%60 = bitcast %struct.grid_launch_parm* %25 to i8*
call spir_func void @llvm.lifetime.start(i64 72, i8* %60)
%tmp = bitcast %struct.grid_launch_parm* %25 to i8*
%tmp1 = bitcast %struct.grid_launch_parm* %26 to i8*
call spir_func void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 72, i32 1, i1 false)
%61 = call spir_func i64 @amp_get_local_id(i32 0) #7
%62 = icmp eq i64 %61, 0
br i1 %62, label %63, label %.preheader.i
; <label>:63 ; preds = %24
store i32 0, i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26
br label %.preheader.i
.preheader.i: ; preds = %63, %24
%64 = icmp slt i64 %61, 8
%sext6.i = shl i64 %61, 32
%65 = ashr exact i64 %sext6.i, 32
%66 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 %65
%67 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 2, i64 %65
%68 = and i64 %61, 7
%69 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 %68
%70 = lshr i64 %61, 3
%71 = trunc i64 %70 to i32
%72 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 %68
%73 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 2, i64 %68
%sext18.i = shl i64 %70, 32
%74 = ashr exact i64 %sext18.i, 32
br label %.outer.i
.outer.i: ; preds = %185, %.preheader.i
%.01.ph.i = phi i32 [ %21, %.preheader.i ], [ %..i, %185 ]
%.0.ph.i = phi i32 [ %19, %.preheader.i ], [ %184, %185 ]
br label %75
; <label>:75 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i, %.outer.i
%.0.i = phi i32 [ %184, %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i ], [ %.0.ph.i, %.outer.i ]
br i1 %64, label %76, label %84
; <label>:76 ; preds = %75
store i32 0, i32 addrspace(3)* %66, align 4, !tbaa !26
%77 = call spir_func i64 @amp_get_local_size(i32 0) #7
%78 = lshr i64 %77, 3
%79 = and i64 %77, 7
%80 = icmp slt i64 %61, %79
%81 = zext i1 %80 to i64
%82 = add nuw nsw i64 %81, %78
%83 = trunc i64 %82 to i32
store i32 %83, i32 addrspace(3)* %67, align 4, !tbaa !26
br label %84
; <label>:84 ; preds = %76, %75
call spir_func void @hc_barrier(i32 1) #8
%85 = call spir_func i64 @hc_get_group_id(i32 0) #7
%86 = shl nsw i64 %85, 9
%87 = add nsw i64 %86, %61
%88 = trunc i64 %87 to i32
%89 = icmp slt i32 %88, %.0.i
br i1 %89, label %90, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
; <label>:90 ; preds = %84
%91 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26
%92 = icmp eq i32 %91, 0
%sext.i = shl i64 %87, 32
%93 = ashr exact i64 %sext.i, 32
br i1 %92, label %94, label %96
; <label>:94 ; preds = %90
%95 = getelementptr inbounds i32 addrspace(1)* %13, i64 %93
br label %98
; <label>:96 ; preds = %90
%97 = getelementptr inbounds [512 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447, i64 0, i64 %93
br label %98
; <label>:98 ; preds = %96, %94
%pid.0.in.i = phi i32 addrspace(3)* [ %95, %94 ], [ %97, %96 ]
%pid.0.i = load i32 addrspace(3)* %pid.0.in.i, align 4
%99 = sext i32 %pid.0.i to i64
%100 = getelementptr inbounds i32 addrspace(1)* %17, i64 %99
store i32 16677221, i32 addrspace(1)* %100, align 4, !tbaa !26
%101 = getelementptr inbounds i32 addrspace(1)* %18, i64 %99
%102 = load i32 addrspace(1)* %101, align 4, !tbaa !26
%103 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %99, i32 0
%104 = load i32 addrspace(1)* %103, align 4, !tbaa !28
%105 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %15, i64 %99, i32 1
%106 = load i32 addrspace(1)* %105, align 4, !tbaa !30
%107 = add nsw i32 %106, %104
%108 = icmp sgt i32 %106, 0
br i1 %108, label %.lr.ph.i.i, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
.lr.ph.i.i: ; preds = %98
%109 = sext i32 %104 to i64
br label %110
; <label>:110 ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i, %.lr.ph.i.i
%indvars.iv.i.i = phi i64 [ %109, %.lr.ph.i.i ], [ %indvars.iv.next.i.i, %_ZN11LocalQueues6appendEiPii.exit.i.i ]
%111 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 0
%112 = load i32 addrspace(1)* %111, align 4, !tbaa !28
%113 = getelementptr inbounds %"class.hc::short_vector::int_2.0" addrspace(1)* %16, i64 %indvars.iv.i.i, i32 1
%114 = load i32 addrspace(1)* %113, align 4, !tbaa !30
%115 = add nsw i32 %114, %102
%116 = sext i32 %112 to i64
%117 = getelementptr inbounds i32 addrspace(1)* %18, i64 %116
%118 = call spir_func i32 @atomic_min_int_global(i32 addrspace(1)* %117, i32 %115) #9
%119 = icmp sgt i32 %118, %115
br i1 %119, label %120, label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:120 ; preds = %110
%121 = getelementptr inbounds i32 addrspace(1)* %17, i64 %116
%122 = call spir_func i32 @atomic_exchange_int_global(i32 addrspace(1)* %121, i32 %.01.ph.i) #9
%123 = icmp eq i32 %122, %.01.ph.i
br i1 %123, label %_ZN11LocalQueues6appendEiPii.exit.i.i, label %124
; <label>:124 ; preds = %120
%125 = call spir_func i32 @atomic_add_int_local(i32 addrspace(3)* %69, i32 1) #9
%126 = icmp sgt i32 %125, 399
br i1 %126, label %127, label %128
; <label>:127 ; preds = %124
store i32 1, i32 addrspace(1)* %23, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
; <label>:128 ; preds = %124
%129 = sext i32 %125 to i64
%130 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %129
store i32 %112, i32 addrspace(3)* %130, align 4, !tbaa !26
br label %_ZN11LocalQueues6appendEiPii.exit.i.i
_ZN11LocalQueues6appendEiPii.exit.i.i: ; preds = %128, %127, %120, %110
%indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, 1
%131 = trunc i64 %indvars.iv.next.i.i to i32
%132 = icmp slt i32 %131, %107
br i1 %132, label %110, label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i: ; preds = %_ZN11LocalQueues6appendEiPii.exit.i.i
br label %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i: ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.loopexit.i, %98, %84
call spir_func void @hc_barrier(i32 1) #8
br i1 %62, label %133, label %165
; <label>:133 ; preds = %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
%134 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 0
store i32 0, i32 addrspace(3)* %134, align 16, !tbaa !26
%135 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 0
%136 = load i32 addrspace(3)* %135, align 4, !tbaa !26
%137 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 1
store i32 %136, i32 addrspace(3)* %137, align 4, !tbaa !26
%138 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 1
%139 = load i32 addrspace(3)* %138, align 4, !tbaa !26
%140 = add nsw i32 %139, %136
%141 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 2
store i32 %140, i32 addrspace(3)* %141, align 8, !tbaa !26
%142 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 2
%143 = load i32 addrspace(3)* %142, align 4, !tbaa !26
%144 = add nsw i32 %143, %140
%145 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 3
store i32 %144, i32 addrspace(3)* %145, align 4, !tbaa !26
%146 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 3
%147 = load i32 addrspace(3)* %146, align 4, !tbaa !26
%148 = add nsw i32 %147, %144
%149 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 4
store i32 %148, i32 addrspace(3)* %149, align 16, !tbaa !26
%150 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 4
%151 = load i32 addrspace(3)* %150, align 4, !tbaa !26
%152 = add nsw i32 %151, %148
%153 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 5
store i32 %152, i32 addrspace(3)* %153, align 4, !tbaa !26
%154 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 5
%155 = load i32 addrspace(3)* %154, align 4, !tbaa !26
%156 = add nsw i32 %155, %152
%157 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 6
store i32 %156, i32 addrspace(3)* %157, align 8, !tbaa !26
%158 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 6
%159 = load i32 addrspace(3)* %158, align 4, !tbaa !26
%160 = add nsw i32 %159, %156
%161 = getelementptr inbounds [8 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E8prefix_q346, i64 0, i64 7
store i32 %160, i32 addrspace(3)* %161, align 4, !tbaa !26
%162 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 0, i64 7
%163 = load i32 addrspace(3)* %162, align 4, !tbaa !26
%164 = add nsw i32 %163, %160
store i32 %164, i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26
store i32 %164, i32 addrspace(1)* %20, align 4, !tbaa !26
br label %165
; <label>:165 ; preds = %133, %_Z10visit_nodeiiPN2hc12short_vector5int_2ES2_R11LocalQueuesPiS5_S5_i.exit.i
call spir_func void @hc_barrier(i32 1) #8
%166 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26
%167 = icmp eq i32 %166, 0
br i1 %167, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i, label %168
; <label>:168 ; preds = %165
%169 = icmp slt i32 %166, 513
%170 = load i32 addrspace(3)* %69, align 4, !tbaa !26
%171 = icmp slt i32 %71, %170
br i1 %169, label %172, label %187
; <label>:172 ; preds = %168
br i1 %171, label %.lr.ph.i2.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
.lr.ph.i2.i: ; preds = %172
%173 = load i32 addrspace(3)* %72, align 4, !tbaa !26
%174 = load i32 addrspace(3)* %73, align 4, !tbaa !26
%175 = sext i32 %174 to i64
%176 = sext i32 %173 to i64
br label %177
; <label>:177 ; preds = %177, %.lr.ph.i2.i
%indvars.iv.i = phi i64 [ %indvars.iv.next.i, %177 ], [ %74, %.lr.ph.i2.i ]
%178 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %indvars.iv.i
%179 = load i32 addrspace(3)* %178, align 4, !tbaa !26
%180 = add nsw i64 %176, %indvars.iv.i
%181 = getelementptr inbounds [512 x i32] addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7next_wf447, i64 0, i64 %180
store i32 %179, i32 addrspace(3)* %181, align 4, !tbaa !26
%indvars.iv.next.i = add nsw i64 %indvars.iv.i, %175
%182 = trunc i64 %indvars.iv.next.i to i32
%183 = icmp slt i32 %182, %170
br i1 %183, label %177, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i: ; preds = %177
br label %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit.i: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.loopexit.i, %172
call spir_func void @hc_barrier(i32 1) #8
%184 = load i32 addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7tot_sum548, align 4, !tbaa !26
br i1 %62, label %185, label %75
; <label>:185 ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit.i
%186 = icmp eq i32 %.01.ph.i, 16677219
%..i = select i1 %186, i32 16677220, i32 16677219
br label %.outer.i
; <label>:187 ; preds = %168
br i1 %171, label %.lr.ph.i3.preheader.i, label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit
.lr.ph.i3.preheader.i: ; preds = %187
br label %.lr.ph.i3.i
.lr.ph.i3.i: ; preds = %.lr.ph.i3.i, %.lr.ph.i3.preheader.i
%local_shift.01.i4.i = phi i32 [ %196, %.lr.ph.i3.i ], [ %71, %.lr.ph.i3.preheader.i ]
%188 = sext i32 %local_shift.01.i4.i to i64
%189 = getelementptr inbounds %struct.LocalQueues addrspace(3)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_.ZZ17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_E7local_q245, i64 0, i32 1, i64 %68, i64 %188
%190 = load i32 addrspace(3)* %189, align 4, !tbaa !26
%191 = load i32 addrspace(3)* %72, align 4, !tbaa !26
%192 = add nsw i32 %191, %local_shift.01.i4.i
%193 = sext i32 %192 to i64
%194 = getelementptr inbounds i32 addrspace(1)* %14, i64 %193
store i32 %190, i32 addrspace(1)* %194, align 4, !tbaa !26
%195 = load i32 addrspace(3)* %73, align 4, !tbaa !26
%196 = add nsw i32 %195, %local_shift.01.i4.i
%197 = load i32 addrspace(3)* %69, align 4, !tbaa !26
%198 = icmp slt i32 %196, %197
br i1 %198, label %.lr.ph.i3.i, label %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i
_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i: ; preds = %.lr.ph.i3.i
br label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit
_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i: ; preds = %165
br label %_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit
_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0_.exit: ; preds = %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit22.i, %_ZN11LocalQueues11concatenateEPiRA8_i.exit5.loopexit.i, %187
%199 = bitcast %struct.grid_launch_parm* %25 to i8*
call spir_func void @llvm.lifetime.end(i64 72, i8* %199)
call spir_func void @llvm.lifetime.end(i64 72, i8* %45)
ret void
}
attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
attributes #2 = { noduplicate "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #3 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #4 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #7 = { nobuiltin nounwind readnone }
attributes #8 = { nobuiltin noduplicate }
attributes #9 = { nobuiltin }
!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !1, !1, !1, !1}
!opencl.kernels = !{!2, !8, !10, !16, !22}
!0 = metadata !{metadata !"HCC clang version 3.5.0 (based on HCC 0.10.16186-d14f969-7461349 LLVM 3.5.0svn)"}
!1 = metadata !{metadata !"HCC clang version 3.5.0 (based on HCC 0.10.16256-042a253-061e735 LLVM 3.5.0svn)"}
!2 = metadata !{void (i64, i8 addrspace(1)*, i8)* @ZZ16ihipMemsetKernelIcEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPcc, metadata !3, metadata !4, metadata !5, metadata !6, metadata !7}
!3 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0}
!4 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none"}
!5 = metadata !{metadata !"kernel_arg_type", metadata !"size_t", metadata !"char*", metadata !"char"}
!6 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !""}
!7 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !""}
!8 = metadata !{void (i64, i32 addrspace(1)*, i32)* @ZZ16ihipMemsetKernelIjEN2hc17completion_futureEP12ihipStream_tPT_S4_mENUlNS0_11tiled_indexILi1EEEE_19__cxxamp_trampolineEmPjj, metadata !3, metadata !4, metadata !9, metadata !6, metadata !7}
!9 = metadata !{metadata !"kernel_arg_type", metadata !"size_t", metadata !"uint*", metadata !"uint"}
!10 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*)* @ZN12_GLOBAL__N_189_Z10BFS_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_, metadata !11, metadata !12, metadata !13, metadata !14, metadata !15}
!11 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
!12 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none"}
!13 = metadata !{metadata !"kernel_arg_type", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"uint", metadata !"int*", metadata !"int*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"int*", metadata !"int*", metadata !"int", metadata !"int*", metadata !"int", metadata !"int", metadata !"int*"}
!14 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"", metadata !"const", metadata !"const", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""}
!15 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""}
!16 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)* @ZN12_GLOBAL__N_1116_Z26BFS_kernel_multi_blk_inGPU16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_S0_S0_iiS0_S0_S0_S0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPiS1_PK32class_hc__short_vector__int_2_glS4_S1_S1_S1_S1_iiS1_PKiS1_S1, metadata !17, metadata !18, metadata !19, metadata !20, metadata !21}
!17 = metadata !{metadata !"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0}
!18 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none", metadata !"none"}
!19 = metadata !{metadata !"kernel_arg_type", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"int", metadata !"uint", metadata !"int*", metadata !"int*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"class_hc__short_vector__int_2_gl*", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int", metadata !"int", metadata !"int*", metadata !"int*", metadata !"int*", metadata !"int*"}
!20 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"const", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"const", metadata !"", metadata !""}
!21 = metadata !{metadata !"kernel_arg_name", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !"", metadata !""}
!22 = metadata !{void (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, %"class.hc::short_vector::int_2.0" addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*, i32, i32, i32 addrspace(1)*)* @ZN12_GLOBAL__N_196_Z17BFS_in_GPU_kernel16grid_launch_parmPiS0_PN2hc12short_vector5int_2ES4_S0_S0_iS0_iiS0__functor19__cxxamp_trampolineEiiiiiiiiiiiijPKiPiPK32class_hc__short_vector__int_2_glS6_S3_S3_iS3_iiS3_, metadata !11, metadata !12, metadata !13, metadata !14, metadata !15}
!23 = metadata !{metadata !24, metadata !24, i64 0}
!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0}
!25 = metadata !{metadata !"Simple C/C++ TBAA"}
!26 = metadata !{metadata !27, metadata !27, i64 0}
!27 = metadata !{metadata !"int", metadata !24, i64 0}
!28 = metadata !{metadata !29, metadata !27, i64 0}
!29 = metadata !{metadata !"_ZTSN2hc12short_vector5int_2E", metadata !27, i64 0, metadata !27, i64 4}
!30 = metadata !{metadata !29, metadata !27, i64 4}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment