Skip to content

Instantly share code, notes, and snippets.

@Flakebi
Created January 19, 2022 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Flakebi/fd1d91a806b60ec330e9f61e19fe62ac to your computer and use it in GitHub Desktop.
Save Flakebi/fd1d91a806b60ec330e9f61e19fe62ac to your computer and use it in GitHub Desktop.
MachineSink failure
--- |
; Compile with: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs -start-before=machine-sink -stop-after=machine-sink PipelineVsFs_0xDD57C231E25DA514.mir -o PipelineVsFs_0xDD57C231E25DA514-after.mir
; ModuleID = 'lgcPipeline'
source_filename = "lgcPipeline"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--amdpal"
%llpc.array.element = type <{ float, [12 x i8] }>
define dllexport amdgpu_gs void @_amdgpu_gs_main(i32 inreg %userDataAddrLow, i32 inreg %userDataAddrHigh, i32 inreg %mergedGroupInfo, i32 inreg %mergedWaveInfo, i32 inreg %offChipLdsBase, i32 inreg %sharedScratchOffset, i32 inreg %primShaderTableAddrLow, i32 inreg %primShaderTableAddrHigh, <7 x i32> inreg %userData, i32 %esGsOffsets01, i32 %esGsOffsets23, i32 %gsPrimitiveId, i32 %invocationId, i32 %esGsOffsets45, i32 %vertexId, i32 %relVertexId, i32 %vsPrimitiveId, i32 %instanceId) #0 !lgc.shaderstage !4 {
.entry:
call void @llvm.amdgcn.init.exec(i64 -1)
%0 = lshr i32 %mergedGroupInfo, 22
%primCountInSubgroup = and i32 %0, 511
%1 = lshr i32 %mergedGroupInfo, 12
%vertCountInSubgroup = and i32 %1, 511
%2 = lshr i32 %mergedWaveInfo, 24
%waveIdInSubgroup = and i32 %2, 15
call void @llvm.amdgcn.s.barrier()
%3 = icmp eq i32 %waveIdInSubgroup, 0
br i1 %3, label %.allocReq, label %.endAllocReq, !amdgpu.uniform !5
.allocReq: ; preds = %.entry
%4 = shl nuw nsw i32 %primCountInSubgroup, 12
%5 = or i32 %4, %vertCountInSubgroup
call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %5)
br label %.endAllocReq, !amdgpu.uniform !5
.endAllocReq: ; preds = %.allocReq, %.entry
%6 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
%threadIdInWave = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %6)
%7 = shl nuw nsw i32 %waveIdInSubgroup, 6
%threadIdInSubgroup = add i32 %7, %threadIdInWave
%8 = icmp ult i32 %threadIdInSubgroup, %primCountInSubgroup
%9 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %8)
%10 = extractvalue { i1, i64 } %9, 0
%11 = extractvalue { i1, i64 } %9, 1
br i1 %10, label %.expPrim, label %.endExpPrim
.expPrim: ; preds = %.endAllocReq
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %esGsOffsets01, i32 undef, i32 undef, i32 undef, i1 true, i1 false)
br label %.endExpPrim, !amdgpu.uniform !5
.endExpPrim: ; preds = %.expPrim, %.endAllocReq
call void @llvm.amdgcn.end.cf.i64(i64 %11)
%12 = icmp ult i32 %threadIdInSubgroup, %vertCountInSubgroup
%13 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %12)
%14 = extractvalue { i1, i64 } %13, 0
%15 = extractvalue { i1, i64 } %13, 1
br i1 %14, label %.expVert, label %Flow14
.expVert: ; preds = %.endExpPrim
%16 = extractelement <7 x i32> %userData, i64 2
%17 = extractelement <7 x i32> %userData, i64 4
%18 = extractelement <7 x i32> %userData, i64 5
%19 = call i64 @llvm.amdgcn.s.getpc() #11
%extelt.offset.i = lshr i64 %19, 32
%.i1.i = trunc i64 %extelt.offset.i to i32
%.upto0.i = insertelement <2 x i32> poison, i32 %16, i64 0
%20 = insertelement <2 x i32> %.upto0.i, i32 %.i1.i, i64 1
%21 = bitcast <2 x i32> %20 to i64
%VertexIndex.i = add i32 %18, %vertexId
%.upto023.i = insertelement <2 x i32> poison, i32 %17, i64 0
%22 = insertelement <2 x i32> %.upto023.i, i32 %.i1.i, i64 1
%23 = bitcast <2 x i32> %22 to i64
%24 = inttoptr i64 %23 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5
%25 = inttoptr i64 %21 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5
%26 = load <4 x i32>, <4 x i32> addrspace(4)* %25, align 16
%27 = load <4 x i32>, <4 x i32> addrspace(4)* %24, align 16, !invariant.load !5
%28 = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %27, i32 %VertexIndex.i, i32 0, i32 0, i32 immarg 77, i32 immarg 0) #5
%29 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #11
%30 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %29) #11
%31 = call i32 asm sideeffect "; %1", "=v,0"(i32 1) #11
%32 = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %31, i32 0, i32 33) #11
%.not19.i = icmp eq i32 %30, 0
%.not19.i.inv = xor i1 %.not19.i, true
%33 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not19.i.inv)
%34 = extractvalue { i1, i64 } %33, 0
%35 = extractvalue { i1, i64 } %33, 1
br i1 %34, label %.lr.ph16.i.preheader, label %Flow13
.lr.ph16.i.preheader: ; preds = %.expVert
%36 = zext i32 %30 to i64
br label %.lr.ph16.i, !amdgpu.uniform !5
Flow13: ; preds = %Flow12, %.expVert
%37 = phi float [ %.1.i.lcssa, %Flow12 ], [ 0.000000e+00, %.expVert ]
call void @llvm.amdgcn.end.cf.i64(i64 %35)
br label %._crit_edge17.i, !amdgpu.uniform !5
.lr.ph16.i: ; preds = %.lr.ph16.i.preheader, %44
%phi.broken = phi i64 [ 0, %.lr.ph16.i.preheader ], [ %45, %44 ]
%lsr.iv8 = phi i32 [ %lsr.iv.next9, %44 ], [ 0, %.lr.ph16.i.preheader ]
%lsr.iv4 = phi i64 [ %lsr.iv.next5, %44 ], [ 0, %.lr.ph16.i.preheader ]
%.02913.i = phi float [ %.1.i, %44 ], [ 0.000000e+00, %.lr.ph16.i.preheader ]
%38 = shl nuw i64 1, %lsr.iv4
%39 = and i64 %38, %32
%.not9.i = icmp eq i64 %39, 0
%.not9.i.inv = xor i1 %.not9.i, true
br i1 %.not9.i.inv, label %40, label %44, !amdgpu.uniform !5
40: ; preds = %.lr.ph16.i
%41 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %lsr.iv8, i32 0) #11
%42 = bitcast i32 %41 to float
%43 = fadd reassoc nnan nsz arcp contract afn float %.02913.i, %42
br label %44, !amdgpu.uniform !5
44: ; preds = %40, %.lr.ph16.i
%.1.i = phi float [ %43, %40 ], [ %.02913.i, %.lr.ph16.i ]
%lsr.iv.next5 = add nuw nsw i64 %lsr.iv4, 1
%tmp6 = trunc i64 %lsr.iv.next5 to i32
%lsr.iv.next9 = add i32 %lsr.iv8, 16
%exitcond21.not.i = icmp eq i32 %30, %tmp6
%45 = call i64 @llvm.amdgcn.if.break.i64(i1 %exitcond21.not.i, i64 %phi.broken)
%46 = call i1 @llvm.amdgcn.loop.i64(i64 %45)
br i1 %46, label %Flow12, label %.lr.ph16.i
Flow12: ; preds = %44
%.1.i.lcssa = phi float [ %.1.i, %44 ]
%.lcssa16 = phi i64 [ %45, %44 ]
call void @llvm.amdgcn.end.cf.i64(i64 %.lcssa16)
br label %Flow13, !amdgpu.uniform !5
Flow14: ; preds = %lgc.ngg.ES.main.exit, %.endExpPrim
call void @llvm.amdgcn.end.cf.i64(i64 %15)
br label %.endExpVert, !amdgpu.uniform !5
._crit_edge17.i: ; preds = %Flow13
%47 = getelementptr <{ [128 x %llpc.array.element] }>, <{ [128 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 0, i32 %30, i32 0
%48 = ptrtoint float addrspace(6)* %47 to i32
%49 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %48, i32 0) #11
%50 = call i32 asm sideeffect "; %1", "=v,0"(i32 %49) #11
%51 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %50, i32 0) #11
%52 = zext i32 %30 to i64
%53 = shl nuw i64 1, %52
%54 = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %51, i32 1698898191, i32 -305419897, i1 true, i1 false) #11
%55 = call i32 @llvm.amdgcn.readlane(i32 %54, i32 16) #11
%56 = call i32 @llvm.amdgcn.writelane(i32 %55, i32 48, i32 %54) #11
%57 = call i32 @llvm.amdgcn.writelane(i32 0, i32 16, i32 %56) #11
%58 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %57, i32 0, i32 -1, i1 true, i1 false) #11
%59 = and i64 %53, 281479271743489
%.not1.i = icmp eq i64 %59, 0
%.v.i = select i1 %.not1.i, i32 %57, i32 %58
%60 = bitcast i32 %.v.i to float
%61 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 273, i32 15, i32 15, i1 false) #11
%62 = bitcast i32 %61 to float
%63 = fadd reassoc nnan nsz arcp contract afn float %60, %62
%64 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 274, i32 15, i32 15, i1 false) #11
%65 = bitcast i32 %64 to float
%66 = fadd reassoc nnan nsz arcp contract afn float %63, %65
%67 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 275, i32 15, i32 15, i1 false) #11
%68 = bitcast i32 %67 to float
%69 = fadd reassoc nnan nsz arcp contract afn float %66, %68
%70 = bitcast float %69 to i32
%71 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %70, i32 276, i32 15, i32 14, i1 false) #11
%72 = bitcast i32 %71 to float
%73 = fadd reassoc nnan nsz arcp contract afn float %69, %72
%74 = bitcast float %73 to i32
%75 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %74, i32 280, i32 15, i32 12, i1 false) #11
%76 = bitcast i32 %75 to float
%77 = fadd reassoc nnan nsz arcp contract afn float %73, %76
%78 = bitcast float %77 to i32
%79 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %78, i32 -1, i32 -1, i1 true, i1 false) #11
%80 = bitcast i32 %79 to float
%81 = and i64 %53, -281470681808896
%.not2.i = icmp eq i64 %81, 0
%82 = select reassoc nnan nsz arcp contract afn i1 %.not2.i, float 0.000000e+00, float %80
%83 = fadd reassoc nnan nsz arcp contract afn float %77, %82
%84 = bitcast float %83 to i32
%85 = call i32 @llvm.amdgcn.readlane(i32 %84, i32 31) #11
%86 = bitcast i32 %85 to float
%.not3.i = icmp ult i32 %30, 32
%87 = select reassoc nnan nsz arcp contract afn i1 %.not3.i, float 0.000000e+00, float %86
%88 = fadd reassoc nnan nsz arcp contract afn float %83, %87
%89 = bitcast float %88 to i32
%90 = call i32 @llvm.amdgcn.wwm.i32(i32 %89) #11
%91 = and i32 %30, 1
%.not.i = icmp eq i32 %91, 0
%.not.i.inv = xor i1 %.not.i, true
%92 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not.i.inv)
%93 = extractvalue { i1, i64 } %92, 0
%94 = extractvalue { i1, i64 } %92, 1
br i1 %93, label %95, label %Flow11
95: ; preds = %._crit_edge17.i
%96 = call i32 asm sideeffect "; %1", "=v,0"(i32 1) #11
%97 = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %96, i32 0, i32 33) #11
%98 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not19.i.inv)
%99 = extractvalue { i1, i64 } %98, 0
%100 = extractvalue { i1, i64 } %98, 1
br i1 %99, label %.lr.ph.i.preheader, label %Flow10
.lr.ph.i.preheader: ; preds = %95
br label %.lr.ph.i, !amdgpu.uniform !5
Flow10: ; preds = %Flow, %95
%101 = phi float [ %.3.i.lcssa, %Flow ], [ 0.000000e+00, %95 ]
call void @llvm.amdgcn.end.cf.i64(i64 %100)
br label %._crit_edge.i, !amdgpu.uniform !5
.lr.ph.i: ; preds = %.lr.ph.i.preheader, %108
%phi.broken15 = phi i64 [ 0, %.lr.ph.i.preheader ], [ %109, %108 ]
%lsr.iv2 = phi i32 [ %lsr.iv.next3, %108 ], [ 0, %.lr.ph.i.preheader ]
%lsr.iv = phi i64 [ %lsr.iv.next, %108 ], [ 0, %.lr.ph.i.preheader ]
%.210.i = phi float [ %.3.i, %108 ], [ 0.000000e+00, %.lr.ph.i.preheader ]
%102 = shl nuw i64 1, %lsr.iv
%103 = and i64 %102, %97
%.not8.i = icmp eq i64 %103, 0
%.not8.i.inv = xor i1 %.not8.i, true
br i1 %.not8.i.inv, label %104, label %108, !amdgpu.uniform !5
104: ; preds = %.lr.ph.i
%105 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %lsr.iv2, i32 0) #11
%106 = bitcast i32 %105 to float
%107 = fadd reassoc nnan nsz arcp contract afn float %.210.i, %106
br label %108, !amdgpu.uniform !5
108: ; preds = %104, %.lr.ph.i
%.3.i = phi float [ %107, %104 ], [ %.210.i, %.lr.ph.i ]
%lsr.iv.next = add nuw nsw i64 %lsr.iv, 1
%tmp = trunc i64 %lsr.iv.next to i32
%lsr.iv.next3 = add i32 %lsr.iv2, 16
%exitcond.not.i = icmp eq i32 %30, %tmp
%109 = call i64 @llvm.amdgcn.if.break.i64(i1 %exitcond.not.i, i64 %phi.broken15)
%110 = call i1 @llvm.amdgcn.loop.i64(i64 %109)
br i1 %110, label %Flow, label %.lr.ph.i
Flow: ; preds = %108
%.3.i.lcssa = phi float [ %.3.i, %108 ]
%.lcssa = phi i64 [ %109, %108 ]
call void @llvm.amdgcn.end.cf.i64(i64 %.lcssa)
br label %Flow10, !amdgpu.uniform !5
Flow11: ; preds = %._crit_edge.i, %._crit_edge17.i
%111 = phi i32 [ %153, %._crit_edge.i ], [ 2, %._crit_edge17.i ]
call void @llvm.amdgcn.end.cf.i64(i64 %94)
br label %lgc.ngg.ES.main.exit, !amdgpu.uniform !5
._crit_edge.i: ; preds = %Flow10
%112 = call i32 asm sideeffect "; %1", "=v,0"(i32 %49) #11
%113 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %112, i32 0) #11
%114 = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %113, i32 1698898191, i32 -305419897, i1 true, i1 false) #11
%115 = call i32 @llvm.amdgcn.readlane(i32 %114, i32 16) #11
%116 = call i32 @llvm.amdgcn.writelane(i32 %115, i32 48, i32 %114) #11
%117 = call i32 @llvm.amdgcn.writelane(i32 0, i32 16, i32 %116) #11
%118 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %117, i32 0, i32 -1, i1 true, i1 false) #11
%.v5.i = select i1 %.not1.i, i32 %117, i32 %118
%119 = bitcast i32 %.v5.i to float
%120 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 273, i32 15, i32 15, i1 false) #11
%121 = bitcast i32 %120 to float
%122 = fadd reassoc nnan nsz arcp contract afn float %119, %121
%123 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 274, i32 15, i32 15, i1 false) #11
%124 = bitcast i32 %123 to float
%125 = fadd reassoc nnan nsz arcp contract afn float %122, %124
%126 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 275, i32 15, i32 15, i1 false) #11
%127 = bitcast i32 %126 to float
%128 = fadd reassoc nnan nsz arcp contract afn float %125, %127
%129 = bitcast float %128 to i32
%130 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %129, i32 276, i32 15, i32 14, i1 false) #11
%131 = bitcast i32 %130 to float
%132 = fadd reassoc nnan nsz arcp contract afn float %128, %131
%133 = bitcast float %132 to i32
%134 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %133, i32 280, i32 15, i32 12, i1 false) #11
%135 = bitcast i32 %134 to float
%136 = fadd reassoc nnan nsz arcp contract afn float %132, %135
%137 = bitcast float %136 to i32
%138 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %137, i32 -1, i32 -1, i1 true, i1 false) #11
%139 = bitcast i32 %138 to float
%140 = select reassoc nnan nsz arcp contract afn i1 %.not2.i, float 0.000000e+00, float %139
%141 = fadd reassoc nnan nsz arcp contract afn float %136, %140
%142 = bitcast float %141 to i32
%143 = call i32 @llvm.amdgcn.readlane(i32 %142, i32 31) #11
%144 = bitcast i32 %143 to float
%145 = select reassoc nnan nsz arcp contract afn i1 %.not3.i, float 0.000000e+00, float %144
%146 = fadd reassoc nnan nsz arcp contract afn float %141, %145
%147 = bitcast float %146 to i32
%148 = call i32 @llvm.amdgcn.wwm.i32(i32 %147) #11
%149 = bitcast i32 %148 to float
%150 = fsub reassoc nnan nsz arcp contract afn float %101, %149
%151 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %150) #11
%152 = fcmp olt float %151, 0x3EE4F8B580000000
%153 = select i1 %152, i32 2, i32 0
br label %Flow11, !amdgpu.uniform !5
lgc.ngg.ES.main.exit: ; preds = %Flow11
%bc29.i = bitcast <4 x i32> %28 to <4 x float>
%vertex0.0.i3.i = extractelement <4 x float> %bc29.i, i64 3
%vertex0.0.i2.i = extractelement <4 x float> %bc29.i, i64 2
%vertex0.0.i1.i = extractelement <4 x float> %bc29.i, i64 1
%vertex0.0.i0.i = extractelement <4 x float> %bc29.i, i64 0
%154 = bitcast i32 %90 to float
%155 = fsub reassoc nnan nsz arcp contract afn float %37, %154
%156 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %155) #11
%157 = fcmp olt float %156, 0x3EE4F8B580000000
%158 = zext i1 %157 to i32
%.028.i = or i32 %111, %158
%159 = uitofp i32 %.028.i to float
call void @llvm.amdgcn.exp.f32(i32 immarg 12, i32 immarg 15, float %vertex0.0.i0.i, float %vertex0.0.i1.i, float %vertex0.0.i2.i, float %vertex0.0.i3.i, i1 immarg false, i1 immarg false) #6
call void @llvm.amdgcn.exp.f32(i32 immarg 13, i32 immarg 1, float 1.000000e+00, float undef, float undef, float undef, i1 immarg true, i1 immarg false) #6
call void @llvm.amdgcn.exp.f32(i32 32, i32 1, float %159, float undef, float undef, float undef, i1 false, i1 false) #11
br label %Flow14, !amdgpu.uniform !5
.endExpVert: ; preds = %Flow14
ret void
}
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #2
; Function Attrs: convergent nounwind readnone willreturn
declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.readlane(i32, i32) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg) #3
; Function Attrs: convergent nounwind readnone willreturn
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #3
; Function Attrs: convergent nounwind readnone speculatable willreturn
declare i32 @llvm.amdgcn.wwm.i32(i32) #4
; Function Attrs: nounwind readonly willreturn
declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #5
; Function Attrs: inaccessiblememonly nounwind willreturn writeonly
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #6
; Function Attrs: nounwind
define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %globalTable, i32 inreg %perShaderTable, i32 inreg %PrimMask, <2 x float> %PerspInterpSample, <2 x float> %PerspInterpCenter, <2 x float> %PerspInterpCentroid, <3 x float> %PerspInterpPullMode, <2 x float> %LinearInterpSample, <2 x float> %LinearInterpCenter, <2 x float> %LinearInterpCentroid, float %LineStipple, float %FragCoordX, float %FragCoordY, float %FragCoordZ, float %FragCoordW, i32 %FrontFacing, i32 %Ancillary, i32 %SampleCoverage, i32 %FixedXY) #7 !lgc.shaderstage !6 {
.entry:
%PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1
%PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0
%0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #8
%1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #8
%2 = fptoui float %1 to i32
%3 = bitcast i32 %2 to float
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 1, float %3, float undef, float undef, float undef, i1 immarg true, i1 immarg true) #6
ret void
}
; Function Attrs: nounwind readnone speculatable willreturn
declare i64 @llvm.amdgcn.s.getpc() #8
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #8
; Function Attrs: nounwind readnone speculatable willreturn
declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #8
; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #2
; Function Attrs: convergent nounwind
declare void @llvm.amdgcn.init.exec(i64 immarg) #9
; Function Attrs: convergent nounwind willreturn
declare void @llvm.amdgcn.s.barrier() #10
; Function Attrs: nounwind
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32) #11
; Function Attrs: inaccessiblememonly nounwind willreturn writeonly
declare void @llvm.amdgcn.exp.i32(i32 immarg, i32 immarg, i32, i32, i32, i32, i1 immarg, i1 immarg) #6
; Function Attrs: convergent nounwind willreturn
declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #10
; Function Attrs: convergent nounwind willreturn
declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #10
; Function Attrs: convergent nounwind readnone willreturn
declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #3
; Function Attrs: convergent nounwind willreturn
declare i1 @llvm.amdgcn.loop.i64(i64) #10
; Function Attrs: convergent nounwind willreturn
declare void @llvm.amdgcn.end.cf.i64(i64) #10
attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,-xnack" "uniform-work-group-size"="false" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #2 = { nounwind readnone willreturn }
attributes #3 = { convergent nounwind readnone willreturn }
attributes #4 = { convergent nounwind readnone speculatable willreturn }
attributes #5 = { nounwind readonly willreturn }
attributes #6 = { inaccessiblememonly nounwind willreturn writeonly }
attributes #7 = { nounwind "InitialPSInputAddr"="2" "amdgpu-color-export"="1" "amdgpu-depth-export"="0" "amdgpu-unroll-threshold"="700" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,-xnack" "uniform-work-group-size"="false" }
attributes #8 = { nounwind readnone speculatable willreturn }
attributes #9 = { convergent nounwind }
attributes #10 = { convergent nounwind willreturn }
attributes #11 = { nounwind }
!lgc.client = !{!0}
!lgc.options.VS = !{!1}
!lgc.options.FS = !{!2}
!amdgpu.pal.metadata.msgpack = !{!3}
!0 = !{!"Vulkan"}
!1 = !{i32 609901243, i32 -1828602377, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!2 = !{i32 -1768611618, i32 2041457464, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800}
!3 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B0.hardware_stages\82\A3.gs\83\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\A3.ps\83\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\DDW\C21\E2]\A5\14\CF\C6N}\07gn\AB\8B\B0.nggSubgroupSize\CC\80\AA.registers\DE\00C\CD,\01\00\CD,\06\CE\EF;\0B\E6\CD,\0A\CE\02,\00\00\CD,\0B\04\CD,\0C\CE\10\00\00\00\CD,2\01\CD,3\01\CD,4\01\CD,5\01\CD,\80\CE\B7[\E7L\CD,\81\00\CD,\8A\CE\22,\00\00\CD,\8B\0E\CD,\8C\CE\10\00\00\00\CD,\8E\00\CD,\8F\CE\10\00\00\0A\CD,\90\CE\10\00\00\0F\CD,\91\CE\10\00\00\03\CD,\92\CE\10\00\00\04\CD,\B2\01\CD,\B3\01\CD,\B4\01\CD,\B5\01\CD\A0\8F\01\CD\A1\91\00\CD\A1\B1\00\CD\A1\B3\00\CD\A1\B4\00\CD\A1\B5\00\CD\A1\B6\01\CD\A1\B8\CE\01\00\00\00\CD\A1\C2\01\CD\A1\C3D\CD\A1\C4\00\CD\A1\C5\01\CD\A1\FF\CC\80\CD\A2\03\CD\08\10\CD\A2\04\CE\01\00\00\00\CD\A2\06\CD\04?\CD\A2\07\CE\01!\00\00\CD\A2\10\02\CD\A2\90\CE\000\00\03\CD\A2\91\CE \04\00~\CD\A2\93\CE\06\02\01\8C\CD\A2\97\02\CD\A2\98\00\CD\A2\99\00\CD\A2\9A\00\CD\A2\9B\00\CD\A2\A1\00\CD\A2\AB\01\CD\A2\AC\00\CD\A2\AD\00\CD\A2\CE\01\CD\A2\D3\CE\00\02\00\01\CD\A2\D5\CE\02\01 \10\CD\A2\D7\00\CD\A2\D8\00\CD\A2\D9\00\CD\A2\DA\00\CD\A2\E4\00\CD\A2\F8\00\CD\A2\F9-\CD\A3\10\00\CD\C2X\7F\CD\C2_\00\CD\C2b\00\A8.shaders\82\A6.pixel\82\B0.api_shader_hash\92\CFy\AE+8\96\95 \DE\00\B1.hardware_mapping\91\A3.ps\A7.vertex\82\B0.api_shader_hash\92\CF\93\01\BD\F7$ZZ\BB\00\B1.hardware_mapping\91\A3.gs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A3Ngg\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF,\C2\BF\A8}\D5\A7\F8\CF\16\98\06\8C\C5e\18A\AD.llpc_version\A452.1\AEamdpal.version\92\02\03"}
!4 = !{i32 1}
!5 = !{}
!6 = !{i32 6}
...
---
name: _amdgpu_gs_main
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: sreg_32, preferred-register: '' }
- { id: 1, class: sreg_32, preferred-register: '' }
- { id: 2, class: sreg_32, preferred-register: '' }
- { id: 3, class: vgpr_32, preferred-register: '' }
- { id: 4, class: sreg_64, preferred-register: '' }
- { id: 5, class: sreg_64, preferred-register: '' }
- { id: 6, class: sgpr_128, preferred-register: '' }
- { id: 7, class: vreg_128, preferred-register: '' }
- { id: 8, class: vgpr_32, preferred-register: '' }
- { id: 9, class: sreg_64, preferred-register: '' }
- { id: 10, class: sreg_64, preferred-register: '' }
- { id: 11, class: sreg_64, preferred-register: '' }
- { id: 12, class: vgpr_32, preferred-register: '' }
- { id: 13, class: sreg_64, preferred-register: '' }
- { id: 14, class: sreg_32, preferred-register: '' }
- { id: 15, class: sreg_64, preferred-register: '' }
- { id: 16, class: sgpr_32, preferred-register: '' }
- { id: 17, class: sgpr_32, preferred-register: '' }
- { id: 18, class: sgpr_32, preferred-register: '' }
- { id: 19, class: sreg_64, preferred-register: '' }
- { id: 20, class: sreg_32, preferred-register: '' }
- { id: 21, class: sreg_64, preferred-register: '' }
- { id: 22, class: vgpr_32, preferred-register: '' }
- { id: 23, class: sreg_64, preferred-register: '' }
- { id: 24, class: vgpr_32, preferred-register: '' }
- { id: 25, class: sreg_64_xexec, preferred-register: '' }
- { id: 26, class: sreg_64_xexec, preferred-register: '' }
- { id: 27, class: sreg_64_xexec, preferred-register: '' }
- { id: 28, class: vgpr_32, preferred-register: '' }
- { id: 29, class: sreg_64, preferred-register: '' }
- { id: 30, class: sreg_64, preferred-register: '' }
- { id: 31, class: sreg_64, preferred-register: '' }
- { id: 32, class: vgpr_32, preferred-register: '' }
- { id: 33, class: sreg_64, preferred-register: '' }
- { id: 34, class: sreg_32, preferred-register: '' }
- { id: 35, class: sreg_64, preferred-register: '' }
- { id: 36, class: sgpr_32, preferred-register: '' }
- { id: 37, class: sgpr_32, preferred-register: '' }
- { id: 38, class: sgpr_32, preferred-register: '' }
- { id: 39, class: sreg_64, preferred-register: '' }
- { id: 40, class: sreg_32, preferred-register: '' }
- { id: 41, class: sreg_64, preferred-register: '' }
- { id: 42, class: vgpr_32, preferred-register: '' }
- { id: 43, class: sreg_64, preferred-register: '' }
- { id: 44, class: vgpr_32, preferred-register: '' }
- { id: 45, class: vgpr_32, preferred-register: '' }
- { id: 46, class: sgpr_32, preferred-register: '' }
- { id: 47, class: sgpr_32, preferred-register: '' }
- { id: 48, class: sgpr_32, preferred-register: '' }
- { id: 49, class: sgpr_32, preferred-register: '' }
- { id: 50, class: sgpr_32, preferred-register: '' }
- { id: 51, class: sgpr_32, preferred-register: '' }
- { id: 52, class: sgpr_32, preferred-register: '' }
- { id: 53, class: sgpr_32, preferred-register: '' }
- { id: 54, class: sgpr_32, preferred-register: '' }
- { id: 55, class: sgpr_32, preferred-register: '' }
- { id: 56, class: sgpr_32, preferred-register: '' }
- { id: 57, class: sgpr_32, preferred-register: '' }
- { id: 58, class: sgpr_32, preferred-register: '' }
- { id: 59, class: sgpr_32, preferred-register: '' }
- { id: 60, class: sgpr_32, preferred-register: '' }
- { id: 61, class: vgpr_32, preferred-register: '' }
- { id: 62, class: vgpr_32, preferred-register: '' }
- { id: 63, class: vgpr_32, preferred-register: '' }
- { id: 64, class: vgpr_32, preferred-register: '' }
- { id: 65, class: vgpr_32, preferred-register: '' }
- { id: 66, class: vgpr_32, preferred-register: '' }
- { id: 67, class: vgpr_32, preferred-register: '' }
- { id: 68, class: vgpr_32, preferred-register: '' }
- { id: 69, class: vgpr_32, preferred-register: '' }
- { id: 70, class: sgpr_224, preferred-register: '' }
- { id: 71, class: sgpr_224, preferred-register: '' }
- { id: 72, class: sreg_32, preferred-register: '' }
- { id: 73, class: sreg_32, preferred-register: '' }
- { id: 74, class: sreg_32, preferred-register: '' }
- { id: 75, class: sreg_32, preferred-register: '' }
- { id: 76, class: sreg_32, preferred-register: '' }
- { id: 77, class: sreg_32, preferred-register: '' }
- { id: 78, class: vgpr_32, preferred-register: '' }
- { id: 79, class: vgpr_32, preferred-register: '' }
- { id: 80, class: sreg_32, preferred-register: '' }
- { id: 81, class: sreg_64, preferred-register: '' }
- { id: 82, class: sreg_32, preferred-register: '' }
- { id: 83, class: vgpr_32, preferred-register: '' }
- { id: 84, class: sreg_32, preferred-register: '' }
- { id: 85, class: vgpr_32, preferred-register: '' }
- { id: 86, class: sreg_32, preferred-register: '' }
- { id: 87, class: vgpr_32, preferred-register: '' }
- { id: 88, class: sreg_64, preferred-register: '' }
- { id: 89, class: vgpr_32, preferred-register: '' }
- { id: 90, class: vgpr_32, preferred-register: '' }
- { id: 91, class: sgpr_32, preferred-register: '' }
- { id: 92, class: sreg_32, preferred-register: '' }
- { id: 93, class: sreg_32, preferred-register: '' }
- { id: 94, class: sreg_32, preferred-register: '' }
- { id: 95, class: sreg_64, preferred-register: '' }
- { id: 96, class: sreg_32, preferred-register: '' }
- { id: 97, class: sreg_64, preferred-register: '' }
- { id: 98, class: vgpr_32, preferred-register: '' }
- { id: 99, class: sreg_64, preferred-register: '' }
- { id: 100, class: sgpr_128, preferred-register: '' }
- { id: 101, class: sreg_32, preferred-register: '' }
- { id: 102, class: sreg_32, preferred-register: '' }
- { id: 103, class: vgpr_32, preferred-register: '' }
- { id: 104, class: sreg_64, preferred-register: '' }
- { id: 105, class: sreg_64, preferred-register: '' }
- { id: 106, class: sreg_64, preferred-register: '' }
- { id: 107, class: sreg_64, preferred-register: '' }
- { id: 108, class: sreg_32, preferred-register: '' }
- { id: 109, class: sgpr_32, preferred-register: '' }
- { id: 110, class: sreg_32, preferred-register: '' }
- { id: 111, class: sreg_64, preferred-register: '' }
- { id: 112, class: sreg_32, preferred-register: '' }
- { id: 113, class: sreg_32, preferred-register: '' }
- { id: 114, class: sreg_32, preferred-register: '' }
- { id: 115, class: sreg_32, preferred-register: '' }
- { id: 116, class: sreg_64, preferred-register: '' }
- { id: 117, class: sreg_64, preferred-register: '' }
- { id: 118, class: sreg_32_xm0_xexec, preferred-register: '' }
- { id: 119, class: vgpr_32, preferred-register: '' }
- { id: 120, class: sreg_64, preferred-register: '' }
- { id: 121, class: sreg_32, preferred-register: '' }
- { id: 122, class: sreg_32, preferred-register: '' }
- { id: 123, class: sreg_64, preferred-register: '' }
- { id: 124, class: vgpr_32, preferred-register: '' }
- { id: 125, class: vgpr_32, preferred-register: '' }
- { id: 126, class: sreg_32, preferred-register: '' }
- { id: 127, class: sreg_32, preferred-register: '' }
- { id: 128, class: vgpr_32, preferred-register: '' }
- { id: 129, class: sgpr_32, preferred-register: '' }
- { id: 130, class: vgpr_32, preferred-register: '' }
- { id: 131, class: sreg_64, preferred-register: '' }
- { id: 132, class: vreg_64, preferred-register: '' }
- { id: 133, class: vgpr_32, preferred-register: '' }
- { id: 134, class: vgpr_32, preferred-register: '' }
- { id: 135, class: sreg_32, preferred-register: '' }
- { id: 136, class: sreg_32, preferred-register: '' }
- { id: 137, class: vgpr_32, preferred-register: '' }
- { id: 138, class: sreg_32, preferred-register: '' }
- { id: 139, class: vgpr_32, preferred-register: '' }
- { id: 140, class: sreg_32, preferred-register: '' }
- { id: 141, class: sreg_32, preferred-register: '' }
- { id: 142, class: sreg_32, preferred-register: '' }
- { id: 143, class: vgpr_32, preferred-register: '' }
- { id: 144, class: vgpr_32, preferred-register: '' }
- { id: 145, class: sreg_32, preferred-register: '' }
- { id: 146, class: vgpr_32, preferred-register: '' }
- { id: 147, class: sreg_32, preferred-register: '' }
- { id: 148, class: vgpr_32, preferred-register: '' }
- { id: 149, class: sreg_32, preferred-register: '' }
- { id: 150, class: vgpr_32, preferred-register: '' }
- { id: 151, class: vgpr_32, preferred-register: '' }
- { id: 152, class: sreg_64, preferred-register: '' }
- { id: 153, class: sreg_64, preferred-register: '' }
- { id: 154, class: sreg_64_xexec, preferred-register: '' }
- { id: 155, class: vgpr_32, preferred-register: '' }
- { id: 156, class: vgpr_32, preferred-register: '' }
- { id: 157, class: vgpr_32, preferred-register: '' }
- { id: 158, class: vgpr_32, preferred-register: '' }
- { id: 159, class: vgpr_32, preferred-register: '' }
- { id: 160, class: vgpr_32, preferred-register: '' }
- { id: 161, class: vgpr_32, preferred-register: '' }
- { id: 162, class: vgpr_32, preferred-register: '' }
- { id: 163, class: vgpr_32, preferred-register: '' }
- { id: 164, class: vgpr_32, preferred-register: '' }
- { id: 165, class: vgpr_32, preferred-register: '' }
- { id: 166, class: vgpr_32, preferred-register: '' }
- { id: 167, class: vgpr_32, preferred-register: '' }
- { id: 168, class: vgpr_32, preferred-register: '' }
- { id: 169, class: vgpr_32, preferred-register: '' }
- { id: 170, class: vgpr_32, preferred-register: '' }
- { id: 171, class: vgpr_32, preferred-register: '' }
- { id: 172, class: sreg_32, preferred-register: '' }
- { id: 173, class: vgpr_32, preferred-register: '' }
- { id: 174, class: sreg_32, preferred-register: '' }
- { id: 175, class: vgpr_32, preferred-register: '' }
- { id: 176, class: vgpr_32, preferred-register: '' }
- { id: 177, class: sreg_64, preferred-register: '' }
- { id: 178, class: sreg_64_xexec, preferred-register: '' }
- { id: 179, class: sgpr_32, preferred-register: '' }
- { id: 180, class: vgpr_32, preferred-register: '' }
- { id: 181, class: vgpr_32, preferred-register: '' }
- { id: 182, class: sreg_32, preferred-register: '' }
- { id: 183, class: sreg_32, preferred-register: '' }
- { id: 184, class: sreg_32, preferred-register: '' }
- { id: 185, class: sreg_64_xexec, preferred-register: '' }
- { id: 186, class: vgpr_32, preferred-register: '' }
- { id: 187, class: vgpr_32, preferred-register: '' }
- { id: 188, class: vgpr_32, preferred-register: '' }
- { id: 189, class: vgpr_32, preferred-register: '' }
- { id: 190, class: sreg_64, preferred-register: '' }
- { id: 191, class: vgpr_32, preferred-register: '' }
- { id: 192, class: vgpr_32, preferred-register: '' }
- { id: 193, class: sgpr_32, preferred-register: '' }
- { id: 194, class: sreg_32, preferred-register: '' }
- { id: 195, class: sreg_64, preferred-register: '' }
- { id: 196, class: sreg_64, preferred-register: '' }
- { id: 197, class: sreg_64, preferred-register: '' }
- { id: 198, class: sreg_64, preferred-register: '' }
- { id: 199, class: sreg_32, preferred-register: '' }
- { id: 200, class: sgpr_32, preferred-register: '' }
- { id: 201, class: sreg_32, preferred-register: '' }
- { id: 202, class: sreg_64, preferred-register: '' }
- { id: 203, class: sreg_32, preferred-register: '' }
- { id: 204, class: sreg_32, preferred-register: '' }
- { id: 205, class: sreg_32, preferred-register: '' }
- { id: 206, class: sreg_32, preferred-register: '' }
- { id: 207, class: sreg_64, preferred-register: '' }
- { id: 208, class: sreg_64, preferred-register: '' }
- { id: 209, class: sreg_32_xm0_xexec, preferred-register: '' }
- { id: 210, class: vgpr_32, preferred-register: '' }
- { id: 211, class: sreg_64, preferred-register: '' }
- { id: 212, class: sreg_32, preferred-register: '' }
- { id: 213, class: sreg_32, preferred-register: '' }
- { id: 214, class: sreg_64, preferred-register: '' }
- { id: 215, class: vgpr_32, preferred-register: '' }
- { id: 216, class: vgpr_32, preferred-register: '' }
- { id: 217, class: sgpr_32, preferred-register: '' }
- { id: 218, class: vgpr_32, preferred-register: '' }
- { id: 219, class: sreg_32, preferred-register: '' }
- { id: 220, class: sreg_32, preferred-register: '' }
- { id: 221, class: vgpr_32, preferred-register: '' }
- { id: 222, class: sreg_32, preferred-register: '' }
- { id: 223, class: vgpr_32, preferred-register: '' }
- { id: 224, class: sreg_32, preferred-register: '' }
- { id: 225, class: sreg_32, preferred-register: '' }
- { id: 226, class: sreg_32, preferred-register: '' }
- { id: 227, class: vgpr_32, preferred-register: '' }
- { id: 228, class: vgpr_32, preferred-register: '' }
- { id: 229, class: sreg_32, preferred-register: '' }
- { id: 230, class: vgpr_32, preferred-register: '' }
- { id: 231, class: sreg_32, preferred-register: '' }
- { id: 232, class: vgpr_32, preferred-register: '' }
- { id: 233, class: vgpr_32, preferred-register: '' }
- { id: 234, class: sreg_64_xexec, preferred-register: '' }
- { id: 235, class: vgpr_32, preferred-register: '' }
- { id: 236, class: vgpr_32, preferred-register: '' }
- { id: 237, class: vgpr_32, preferred-register: '' }
- { id: 238, class: vgpr_32, preferred-register: '' }
- { id: 239, class: vgpr_32, preferred-register: '' }
- { id: 240, class: vgpr_32, preferred-register: '' }
- { id: 241, class: vgpr_32, preferred-register: '' }
- { id: 242, class: vgpr_32, preferred-register: '' }
- { id: 243, class: vgpr_32, preferred-register: '' }
- { id: 244, class: vgpr_32, preferred-register: '' }
- { id: 245, class: vgpr_32, preferred-register: '' }
- { id: 246, class: vgpr_32, preferred-register: '' }
- { id: 247, class: vgpr_32, preferred-register: '' }
- { id: 248, class: vgpr_32, preferred-register: '' }
- { id: 249, class: vgpr_32, preferred-register: '' }
- { id: 250, class: vgpr_32, preferred-register: '' }
- { id: 251, class: sreg_32, preferred-register: '' }
- { id: 252, class: vgpr_32, preferred-register: '' }
- { id: 253, class: sgpr_32, preferred-register: '' }
- { id: 254, class: vgpr_32, preferred-register: '' }
- { id: 255, class: sreg_64_xexec, preferred-register: '' }
- { id: 256, class: vgpr_32, preferred-register: '' }
- { id: 257, class: sreg_32, preferred-register: '' }
- { id: 258, class: sreg_32, preferred-register: '' }
- { id: 259, class: vgpr_32, preferred-register: '' }
- { id: 260, class: sreg_64_xexec, preferred-register: '' }
- { id: 261, class: vgpr_32, preferred-register: '' }
- { id: 262, class: vgpr_32, preferred-register: '' }
- { id: 263, class: vgpr_32, preferred-register: '' }
- { id: 264, class: vgpr_32, preferred-register: '' }
- { id: 265, class: sgpr_32, preferred-register: '' }
- { id: 266, class: sreg_64_xexec, preferred-register: '' }
- { id: 267, class: vgpr_32, preferred-register: '' }
- { id: 268, class: sreg_32, preferred-register: '' }
- { id: 269, class: vgpr_32, preferred-register: '' }
- { id: 270, class: vgpr_32, preferred-register: '' }
- { id: 271, class: vgpr_32, preferred-register: '' }
- { id: 272, class: vgpr_32, preferred-register: '' }
- { id: 273, class: vgpr_32, preferred-register: '' }
- { id: 274, class: sgpr_32, preferred-register: '' }
- { id: 275, class: sreg_64_xexec, preferred-register: '' }
- { id: 276, class: vgpr_32, preferred-register: '' }
- { id: 277, class: vgpr_32, preferred-register: '' }
- { id: 278, class: vgpr_32, preferred-register: '' }
- { id: 279, class: vgpr_32, preferred-register: '' }
- { id: 280, class: sgpr_32, preferred-register: '' }
- { id: 281, class: vgpr_32, preferred-register: '' }
- { id: 282, class: sgpr_32, preferred-register: '' }
- { id: 283, class: vgpr_32, preferred-register: '' }
- { id: 284, class: sgpr_32, preferred-register: '' }
- { id: 285, class: vgpr_32, preferred-register: '' }
- { id: 286, class: sgpr_32, preferred-register: '' }
- { id: 287, class: vgpr_32, preferred-register: '' }
- { id: 288, class: sgpr_32, preferred-register: '' }
- { id: 289, class: vgpr_32, preferred-register: '' }
- { id: 290, class: sgpr_32, preferred-register: '' }
- { id: 291, class: vgpr_32, preferred-register: '' }
- { id: 292, class: vgpr_32, preferred-register: '' }
- { id: 293, class: vgpr_32, preferred-register: '' }
- { id: 294, class: vgpr_32, preferred-register: '' }
- { id: 295, class: vgpr_32, preferred-register: '' }
- { id: 296, class: vgpr_32, preferred-register: '' }
- { id: 297, class: vreg_64, preferred-register: '' }
- { id: 298, class: vreg_64, preferred-register: '' }
- { id: 299, class: vgpr_32, preferred-register: '' }
- { id: 300, class: vgpr_32, preferred-register: '' }
- { id: 301, class: vgpr_32, preferred-register: '' }
- { id: 302, class: vgpr_32, preferred-register: '' }
- { id: 303, class: vgpr_32, preferred-register: '' }
- { id: 304, class: vgpr_32, preferred-register: '' }
- { id: 305, class: sreg_32, preferred-register: '' }
- { id: 306, class: sreg_32, preferred-register: '' }
- { id: 307, class: sreg_32, preferred-register: '' }
- { id: 308, class: sreg_32, preferred-register: '' }
- { id: 309, class: sreg_32, preferred-register: '' }
- { id: 310, class: sreg_32, preferred-register: '' }
- { id: 311, class: sreg_32, preferred-register: '' }
- { id: 312, class: sreg_32, preferred-register: '' }
- { id: 313, class: sreg_32, preferred-register: '' }
- { id: 314, class: sreg_32, preferred-register: '' }
- { id: 315, class: sreg_32, preferred-register: '' }
- { id: 316, class: sreg_32, preferred-register: '' }
liveins:
- { reg: '$sgpr2', virtual-reg: '%48' }
- { reg: '$sgpr3', virtual-reg: '%49' }
- { reg: '$sgpr8', virtual-reg: '%54' }
- { reg: '$sgpr9', virtual-reg: '%55' }
- { reg: '$sgpr10', virtual-reg: '%56' }
- { reg: '$sgpr11', virtual-reg: '%57' }
- { reg: '$sgpr12', virtual-reg: '%58' }
- { reg: '$sgpr13', virtual-reg: '%59' }
- { reg: '$sgpr14', virtual-reg: '%60' }
- { reg: '$vgpr0', virtual-reg: '%61' }
- { reg: '$vgpr5', virtual-reg: '%66' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo:
explicitKernArgSize: 0
maxKernArgAlign: 4
ldsSize: 0
dynLDSAlign: 1
isEntryFunction: true
noSignedZerosFPMath: false
memoryBound: false
waveLimiter: false
hasSpilledSGPRs: false
hasSpilledVGPRs: false
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103'
frameOffsetReg: '$fp_reg'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentWaveByteOffset: { reg: '$sgpr5' }
mode:
ieee: false
dx10-clamp: true
fp32-input-denormals: false
fp32-output-denormals: false
fp64-fp16-input-denormals: true
fp64-fp16-output-denormals: true
highBitsOf32BitAddress: 0
occupancy: 20
body: |
bb.0..entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $sgpr2, $sgpr3, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr5
%66:vgpr_32 = COPY $vgpr5
%61:vgpr_32 = COPY $vgpr0
%60:sgpr_32 = COPY $sgpr14
%59:sgpr_32 = COPY $sgpr13
%58:sgpr_32 = COPY $sgpr12
%57:sgpr_32 = COPY $sgpr11
%56:sgpr_32 = COPY $sgpr10
%55:sgpr_32 = COPY $sgpr9
%54:sgpr_32 = COPY $sgpr8
%49:sgpr_32 = COPY $sgpr3
%48:sgpr_32 = COPY $sgpr2
%71:sgpr_224 = REG_SEQUENCE %54, %subreg.sub0, %55, %subreg.sub1, %56, %subreg.sub2, %57, %subreg.sub3, %58, %subreg.sub4, %59, %subreg.sub5, %60, %subreg.sub6
%70:sgpr_224 = COPY %71
SI_INIT_EXEC -1, implicit-def dead $exec
%0:sreg_32 = S_BFE_U32 %48, 589846, implicit-def dead $scc
%1:sreg_32 = S_BFE_U32 %48, 589836, implicit-def dead $scc
%2:sreg_32 = S_BFE_U32 %49, 262168, implicit-def dead $scc
S_BARRIER
%72:sreg_32 = S_MOV_B32 0
S_CMP_LG_U32 %2, %72, implicit-def $scc
S_CBRANCH_SCC1 %bb.2, implicit $scc
S_BRANCH %bb.1
bb.1..allocReq:
successors: %bb.2(0x80000000)
%73:sreg_32 = S_MOV_B32 12
%74:sreg_32 = nuw nsw S_LSHL_B32 %0, killed %73, implicit-def dead $scc
%75:sreg_32 = S_OR_B32 killed %74, %1, implicit-def dead $scc
$m0 = COPY %75
S_SENDMSG 9, implicit $exec, implicit $m0
bb.2..endAllocReq:
successors: %bb.3(0x40000000), %bb.4(0x40000000)
%77:sreg_32 = S_MOV_B32 -1
%78:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %77, %72, implicit $exec
%79:vgpr_32 = V_MBCNT_HI_U32_B32_e64 %77, killed %78, implicit $exec
%80:sreg_32 = S_MOV_B32 6
%3:vgpr_32 = V_LSHL_OR_B32_e64 %2, killed %80, killed %79, implicit $exec
%81:sreg_64 = V_CMP_LT_U32_e64 %3, %0, implicit $exec
%4:sreg_64 = SI_IF killed %81, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.3
bb.3..expPrim:
successors: %bb.4(0x80000000)
%82:sreg_32 = IMPLICIT_DEF
%83:vgpr_32 = COPY %82
%84:sreg_32 = IMPLICIT_DEF
%85:vgpr_32 = COPY %84
%86:sreg_32 = IMPLICIT_DEF
%87:vgpr_32 = COPY %86
EXP_DONE 20, %61, %83, %85, %87, 0, 0, 1, implicit $exec
bb.4..endExpPrim:
successors: %bb.5(0x40000000), %bb.12(0x40000000)
SI_END_CF %4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
%88:sreg_64 = V_CMP_LT_U32_e64 %3, %1, implicit $exec
%5:sreg_64 = SI_IF killed %88, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.5
bb.5..expVert:
successors: %bb.6(0x40000000), %bb.7(0x40000000)
%92:sreg_32 = COPY %70.sub2
%93:sreg_32 = COPY %70.sub4
%94:sreg_32 = COPY %70.sub5
%95:sreg_64 = S_GETPC_B64
%96:sreg_32 = COPY %95.sub1
%97:sreg_64 = REG_SEQUENCE killed %92, %subreg.sub0, %96, %subreg.sub1
%98:vgpr_32 = V_ADD_U32_e64 killed %94, %66, 0, implicit $exec
%99:sreg_64 = REG_SEQUENCE killed %93, %subreg.sub0, %96, %subreg.sub1
%6:sgpr_128 = S_LOAD_DWORDX4_IMM killed %97, 0, 0 :: (load (s128) from %ir.25, addrspace 4)
%100:sgpr_128 = S_LOAD_DWORDX4_IMM killed %99, 0, 0 :: (invariant load (s128) from %ir.24, addrspace 4)
%101:sreg_32 = S_MOV_B32 0
%7:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN killed %98, killed %100, %101, 0, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4)
%102:sreg_32 = S_MOV_B32 -1
%103:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %102, %101, implicit $exec
%8:vgpr_32 = V_MBCNT_HI_U32_B32_e64 %102, killed %103, implicit $exec
%90:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %89, 2147483657 /* reguse tiedto:$0 */, %90(tied-def 3)
%104:sreg_64 = V_CMP_NE_U32_e64 %89, %101, implicit $exec
%105:sreg_64 = COPY %104
%106:sreg_64 = V_CMP_NE_U32_e64 %8, %101, implicit $exec
%10:sreg_64 = COPY %106
%292:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%11:sreg_64 = SI_IF %106, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.6
bb.6..lr.ph16.i.preheader:
successors: %bb.8(0x80000000)
%107:sreg_64 = S_MOV_B64 0
%296:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%113:sreg_32 = S_MOV_B32 1
%120:sreg_64 = S_MOV_B64 1
%122:sreg_32 = S_MOV_B32 16
S_BRANCH %bb.8
bb.7.Flow13:
successors: %bb.13(0x80000000)
%12:vgpr_32 = PHI %292, %bb.5, %22, %bb.11
SI_END_CF %11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.13
bb.8..lr.ph16.i:
successors: %bb.9(0x40000000), %bb.10(0x40000000)
%13:sreg_64 = PHI %107, %bb.6, %21, %bb.10
%14:sreg_32 = PHI %101, %bb.6, %20, %bb.10
%15:sreg_64 = PHI %107, %bb.6, %19, %bb.10
%295:vgpr_32 = PHI %296, %bb.6, %293, %bb.10
%110:sreg_32 = COPY %15.sub0
%111:sreg_64 = S_LSHR_B64 %105, killed %110, implicit-def dead $scc
%112:sreg_32 = COPY %111.sub0
%114:sreg_32 = S_AND_B32 killed %112, %113, implicit-def dead $scc
%116:sreg_64 = REG_SEQUENCE killed %114, %subreg.sub0, %101, %subreg.sub1
S_CMP_EQ_U64 killed %116, %107, implicit-def $scc
%294:vgpr_32 = COPY %295, implicit $exec
S_CBRANCH_SCC1 %bb.10, implicit $scc
S_BRANCH %bb.9
bb.9 (%ir-block.40):
successors: %bb.10(0x80000000)
%118:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %6, %14, 0 :: (dereferenceable invariant load (s32))
%119:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %295, 0, killed %118, 0, 0, implicit $mode, implicit $exec
bb.10 (%ir-block.44):
successors: %bb.11(0x04000000), %bb.8(0x7c000000)
%293:vgpr_32 = PHI %294, %bb.8, %119, %bb.9
%307:sreg_32 = COPY %15.sub0
%308:sreg_32 = COPY %15.sub1
%309:sreg_32 = COPY %120.sub0
%310:sreg_32 = COPY %120.sub1
%305:sreg_32 = S_ADD_U32 %307, %309, implicit-def $scc
%306:sreg_32 = S_ADDC_U32 %308, %310, implicit-def $scc, implicit $scc
%19:sreg_64 = REG_SEQUENCE %305, %subreg.sub0, %306, %subreg.sub1
%121:sreg_32 = COPY %19.sub0
%20:sreg_32 = S_ADD_I32 %14, %122, implicit-def dead $scc
%123:sreg_64 = V_CMP_EQ_U32_e64 %8, killed %121, implicit $exec
%21:sreg_64 = SI_IF_BREAK killed %123, %13, implicit-def dead $scc
SI_LOOP %21, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.11
bb.11.Flow12:
successors: %bb.7(0x80000000)
%22:vgpr_32 = PHI %293, %bb.10
SI_END_CF %21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.7
bb.12.Flow14:
successors: %bb.24(0x80000000)
SI_END_CF %5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.24
bb.13.._crit_edge17.i:
successors: %bb.14(0x40000000), %bb.21(0x40000000)
%127:sreg_32 = S_MOV_B32 4
%128:vgpr_32 = V_LSHLREV_B32_e64 killed %127, %8, implicit $exec
%129:sgpr_32 = S_MOV_B32 0
%24:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN killed %128, %6, %129, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
%125:vgpr_32 = COPY %24
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %124, 2147483657 /* reguse tiedto:$0 */, %125(tied-def 3)
%130:vgpr_32 = V_SET_INACTIVE_B32 %124, %129, implicit-def dead $scc, implicit $exec
%131:sreg_64 = S_MOV_B64 1
%132:vreg_64 = nuw V_LSHLREV_B64_e64 %8, killed %131, implicit $exec
%133:vgpr_32 = COPY %132.sub0
%134:vgpr_32 = COPY %132.sub1
%135:sreg_32 = S_MOV_B32 -305419897
%136:sreg_32 = S_MOV_B32 1698898191
%138:sreg_32 = IMPLICIT_DEF
%139:vgpr_32 = COPY %138
%137:vgpr_32 = V_PERMLANE16_B32_e64 -1, killed %130, 0, killed %136, 0, killed %135, %139, 0, implicit $exec
%140:sreg_32 = S_MOV_B32 16
%141:sreg_32 = V_READLANE_B32 %137, %140
%142:sreg_32 = S_MOV_B32 48
%143:vgpr_32 = V_WRITELANE_B32 killed %141, killed %142, %137
%144:vgpr_32 = V_WRITELANE_B32 %129, %140, %143
%145:sreg_32 = S_MOV_B32 -1
%147:sreg_32 = IMPLICIT_DEF
%148:vgpr_32 = COPY %147
%146:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %144, 0, %129, 0, %145, %148, 0, implicit $exec
%149:sreg_32 = S_MOV_B32 65537
%150:vgpr_32 = V_AND_B32_e64 %134, %149, implicit $exec
%151:vgpr_32 = V_AND_B32_e64 %133, %149, implicit $exec
%297:vreg_64 = REG_SEQUENCE killed %151, %subreg.sub0, killed %150, %subreg.sub1
%153:sreg_64 = S_MOV_B64 0
%154:sreg_64_xexec = V_CMP_EQ_U64_e64 killed %297, %153, implicit $exec
%25:sreg_64_xexec = COPY %154
%155:vgpr_32 = V_CNDMASK_B32_e64 0, killed %146, 0, %144, %154, implicit $exec
%157:vgpr_32 = COPY %129
%156:vgpr_32 = V_MOV_B32_dpp %157, %155, 273, 15, 15, 0, implicit $exec
%158:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %155, 0, killed %156, 0, 0, implicit $mode, implicit $exec
%160:vgpr_32 = COPY %129
%159:vgpr_32 = V_MOV_B32_dpp %160, %155, 274, 15, 15, 0, implicit $exec
%161:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %158, 0, killed %159, 0, 0, implicit $mode, implicit $exec
%163:vgpr_32 = COPY %129
%162:vgpr_32 = V_MOV_B32_dpp %163, %155, 275, 15, 15, 0, implicit $exec
%164:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %161, 0, killed %162, 0, 0, implicit $mode, implicit $exec
%166:vgpr_32 = COPY %129
%165:vgpr_32 = V_MOV_B32_dpp %166, %164, 276, 15, 14, 0, implicit $exec
%167:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %164, 0, killed %165, 0, 0, implicit $mode, implicit $exec
%169:vgpr_32 = COPY %129
%168:vgpr_32 = V_MOV_B32_dpp %169, %167, 280, 15, 12, 0, implicit $exec
%170:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %167, 0, killed %168, 0, 0, implicit $mode, implicit $exec
%172:sreg_32 = IMPLICIT_DEF
%173:vgpr_32 = COPY %172
%171:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %170, 0, %145, 0, %145, %173, 0, implicit $exec
%174:sreg_32 = S_MOV_B32 -65536
%175:vgpr_32 = V_AND_B32_e64 %134, %174, implicit $exec
%176:vgpr_32 = V_AND_B32_e64 %133, %174, implicit $exec
%298:vreg_64 = REG_SEQUENCE killed %176, %subreg.sub0, killed %175, %subreg.sub1
%178:sreg_64_xexec = V_CMP_EQ_U64_e64 killed %298, %153, implicit $exec
%26:sreg_64_xexec = COPY %178
%180:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %171, 0, %129, %178, implicit $exec
%181:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %170, 0, killed %180, 0, 0, implicit $mode, implicit $exec
%182:sreg_32 = S_MOV_B32 31
%183:sreg_32 = V_READLANE_B32 %181, killed %182
%184:sreg_32 = S_MOV_B32 32
%185:sreg_64_xexec = V_CMP_LT_U32_e64 %8, killed %184, implicit $exec
%27:sreg_64_xexec = COPY %185
%187:vgpr_32 = COPY %129
%186:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %183, 0, %187, %185, implicit $exec
%188:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %181, 0, killed %186, 0, 0, implicit $mode, implicit $exec
early-clobber %28:vgpr_32 = STRICT_WWM killed %188, implicit $exec
%189:vgpr_32 = V_AND_B32_e64 1, %8, implicit $exec
%190:sreg_64 = V_CMP_EQ_U32_e64 killed %189, 1, implicit $exec
%304:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%29:sreg_64 = SI_IF killed %190, %bb.21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.14
bb.14 (%ir-block.95):
successors: %bb.15(0x40000000), %bb.16(0x40000000)
%192:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %191, 2147483657 /* reguse tiedto:$0 */, %192(tied-def 3)
%195:sreg_64 = V_CMP_NE_U32_e64 %191, %129, implicit $exec
%196:sreg_64 = COPY %195
%197:sreg_64 = COPY %10
%299:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%31:sreg_64 = SI_IF %197, %bb.16, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.15
bb.15..lr.ph.i.preheader:
successors: %bb.17(0x80000000)
%199:sreg_32 = S_MOV_B32 0
%198:sreg_64 = S_MOV_B64 0
%303:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%204:sreg_32 = S_MOV_B32 1
%211:sreg_64 = S_MOV_B64 1
%213:sreg_32 = S_MOV_B32 16
S_BRANCH %bb.17
bb.16.Flow10:
successors: %bb.22(0x80000000)
%32:vgpr_32 = PHI %299, %bb.14, %42, %bb.20
SI_END_CF %31, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.22
bb.17..lr.ph.i:
successors: %bb.18(0x40000000), %bb.19(0x40000000)
%33:sreg_64 = PHI %198, %bb.15, %41, %bb.19
%34:sreg_32 = PHI %199, %bb.15, %40, %bb.19
%35:sreg_64 = PHI %198, %bb.15, %39, %bb.19
%302:vgpr_32 = PHI %303, %bb.15, %300, %bb.19
%201:sreg_32 = COPY %35.sub0
%202:sreg_64 = S_LSHR_B64 %196, killed %201, implicit-def dead $scc
%203:sreg_32 = COPY %202.sub0
%205:sreg_32 = S_AND_B32 killed %203, %204, implicit-def dead $scc
%207:sreg_64 = REG_SEQUENCE killed %205, %subreg.sub0, %199, %subreg.sub1
S_CMP_EQ_U64 killed %207, %198, implicit-def $scc
%301:vgpr_32 = COPY %302, implicit $exec
S_CBRANCH_SCC1 %bb.19, implicit $scc
S_BRANCH %bb.18
bb.18 (%ir-block.104):
successors: %bb.19(0x80000000)
%209:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %6, %34, 0 :: (dereferenceable invariant load (s32))
%210:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %302, 0, killed %209, 0, 0, implicit $mode, implicit $exec
bb.19 (%ir-block.108):
successors: %bb.20(0x04000000), %bb.17(0x7c000000)
%300:vgpr_32 = PHI %301, %bb.17, %210, %bb.18
%313:sreg_32 = COPY %35.sub0
%314:sreg_32 = COPY %35.sub1
%315:sreg_32 = COPY %211.sub0
%316:sreg_32 = COPY %211.sub1
%311:sreg_32 = S_ADD_U32 %313, %315, implicit-def $scc
%312:sreg_32 = S_ADDC_U32 %314, %316, implicit-def $scc, implicit $scc
%39:sreg_64 = REG_SEQUENCE %311, %subreg.sub0, %312, %subreg.sub1
%212:sreg_32 = COPY %39.sub0
%40:sreg_32 = S_ADD_I32 %34, %213, implicit-def dead $scc
%214:sreg_64 = V_CMP_EQ_U32_e64 %8, killed %212, implicit $exec
%41:sreg_64 = SI_IF_BREAK killed %214, %33, implicit-def dead $scc
SI_LOOP %41, %bb.17, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.20
bb.20.Flow:
successors: %bb.16(0x80000000)
%42:vgpr_32 = PHI %300, %bb.19
SI_END_CF %41, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.16
bb.21.Flow11:
successors: %bb.23(0x80000000)
%44:vgpr_32 = PHI %304, %bb.13, %45, %bb.22
SI_END_CF %29, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.23
bb.22.._crit_edge.i:
successors: %bb.21(0x80000000)
%216:vgpr_32 = COPY %24
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %215, 2147483657 /* reguse tiedto:$0 */, %216(tied-def 3)
%217:sgpr_32 = S_MOV_B32 0
%218:vgpr_32 = V_SET_INACTIVE_B32 %215, %217, implicit-def dead $scc, implicit $exec
%219:sreg_32 = S_MOV_B32 -305419897
%220:sreg_32 = S_MOV_B32 1698898191
%222:sreg_32 = IMPLICIT_DEF
%223:vgpr_32 = COPY %222
%221:vgpr_32 = V_PERMLANE16_B32_e64 -1, killed %218, 0, killed %220, 0, killed %219, %223, 0, implicit $exec
%224:sreg_32 = S_MOV_B32 16
%225:sreg_32 = V_READLANE_B32 %221, %224
%226:sreg_32 = S_MOV_B32 48
%227:vgpr_32 = V_WRITELANE_B32 killed %225, killed %226, %221
%228:vgpr_32 = V_WRITELANE_B32 %217, %224, %227
%229:sreg_32 = S_MOV_B32 -1
%231:sreg_32 = IMPLICIT_DEF
%232:vgpr_32 = COPY %231
%230:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %228, 0, %217, 0, %229, %232, 0, implicit $exec
%233:vgpr_32 = V_CNDMASK_B32_e64 0, killed %230, 0, %228, %25, implicit $exec
%236:vgpr_32 = COPY %217
%235:vgpr_32 = V_MOV_B32_dpp %236, %233, 273, 15, 15, 0, implicit $exec
%237:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %233, 0, killed %235, 0, 0, implicit $mode, implicit $exec
%239:vgpr_32 = COPY %217
%238:vgpr_32 = V_MOV_B32_dpp %239, %233, 274, 15, 15, 0, implicit $exec
%240:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %237, 0, killed %238, 0, 0, implicit $mode, implicit $exec
%242:vgpr_32 = COPY %217
%241:vgpr_32 = V_MOV_B32_dpp %242, %233, 275, 15, 15, 0, implicit $exec
%243:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %240, 0, killed %241, 0, 0, implicit $mode, implicit $exec
%245:vgpr_32 = COPY %217
%244:vgpr_32 = V_MOV_B32_dpp %245, %243, 276, 15, 14, 0, implicit $exec
%246:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %243, 0, killed %244, 0, 0, implicit $mode, implicit $exec
%248:vgpr_32 = COPY %217
%247:vgpr_32 = V_MOV_B32_dpp %248, %246, 280, 15, 12, 0, implicit $exec
%249:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %246, 0, killed %247, 0, 0, implicit $mode, implicit $exec
%251:sreg_32 = IMPLICIT_DEF
%252:vgpr_32 = COPY %251
%250:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %249, 0, %229, 0, %229, %252, 0, implicit $exec
%254:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %250, 0, %217, %26, implicit $exec
%256:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %249, 0, killed %254, 0, 0, implicit $mode, implicit $exec
%257:sreg_32 = S_MOV_B32 31
%258:sreg_32 = V_READLANE_B32 %256, killed %257
%261:vgpr_32 = COPY %217
%259:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %258, 0, %261, %27, implicit $exec
%262:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %256, 0, killed %259, 0, 0, implicit $mode, implicit $exec
early-clobber %263:vgpr_32 = STRICT_WWM killed %262, implicit $exec
%264:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_SUB_F32_e64 0, %32, 0, killed %263, 0, 0, implicit $mode, implicit $exec
%265:sgpr_32 = S_MOV_B32 925353388
%266:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 2, killed %264, 0, killed %265, 0, implicit $mode, implicit $exec
%267:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %266, implicit $exec
%268:sreg_32 = S_MOV_B32 1
%45:vgpr_32 = V_LSHLREV_B32_e64 killed %268, killed %267, implicit $exec
S_BRANCH %bb.21
bb.23.lgc.ngg.ES.main.exit:
successors: %bb.12(0x80000000)
%269:vgpr_32 = COPY %7.sub3
%270:vgpr_32 = COPY %7.sub2
%271:vgpr_32 = COPY %7.sub1
%272:vgpr_32 = COPY %7.sub0
%273:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_SUB_F32_e64 0, %12, 0, %28, 0, 0, implicit $mode, implicit $exec
%274:sgpr_32 = S_MOV_B32 925353388
%275:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 2, killed %273, 0, killed %274, 0, implicit $mode, implicit $exec
%276:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %275, implicit $exec
%277:vgpr_32 = V_OR_B32_e64 %44, killed %276, implicit $exec
%278:vgpr_32 = V_CVT_F32_UBYTE0_e64 killed %277, 0, 0, implicit $exec
EXP 12, killed %272, killed %271, killed %270, killed %269, 0, 0, 15, implicit $exec
%279:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
%280:sgpr_32 = IMPLICIT_DEF
%281:vgpr_32 = COPY %280
%282:sgpr_32 = IMPLICIT_DEF
%283:vgpr_32 = COPY %282
%284:sgpr_32 = IMPLICIT_DEF
%285:vgpr_32 = COPY %284
EXP_DONE 13, killed %279, %281, %283, %285, 0, 0, 1, implicit $exec
%286:sgpr_32 = IMPLICIT_DEF
%287:vgpr_32 = COPY %286
%288:sgpr_32 = IMPLICIT_DEF
%289:vgpr_32 = COPY %288
%290:sgpr_32 = IMPLICIT_DEF
%291:vgpr_32 = COPY %290
EXP 32, killed %278, %287, %289, %291, 0, 0, 1, implicit $exec
S_BRANCH %bb.12
bb.24..endExpVert:
S_ENDPGM 0
...
---
name: _amdgpu_ps_main
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: sgpr_32, preferred-register: '' }
- { id: 1, class: sgpr_32, preferred-register: '' }
- { id: 2, class: sgpr_32, preferred-register: '' }
- { id: 3, class: vgpr_32, preferred-register: '' }
- { id: 4, class: vgpr_32, preferred-register: '' }
- { id: 5, class: sgpr_32, preferred-register: '' }
- { id: 6, class: vgpr_32, preferred-register: '' }
- { id: 7, class: vgpr_32, preferred-register: '' }
- { id: 8, class: vgpr_32, preferred-register: '' }
- { id: 9, class: sgpr_32, preferred-register: '' }
- { id: 10, class: vgpr_32, preferred-register: '' }
- { id: 11, class: sgpr_32, preferred-register: '' }
- { id: 12, class: vgpr_32, preferred-register: '' }
- { id: 13, class: sgpr_32, preferred-register: '' }
- { id: 14, class: vgpr_32, preferred-register: '' }
liveins:
- { reg: '$sgpr2', virtual-reg: '%2' }
- { reg: '$vgpr0', virtual-reg: '%3' }
- { reg: '$vgpr1', virtual-reg: '%4' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo:
explicitKernArgSize: 0
maxKernArgAlign: 4
ldsSize: 0
dynLDSAlign: 1
isEntryFunction: true
noSignedZerosFPMath: false
memoryBound: false
waveLimiter: false
hasSpilledSGPRs: false
hasSpilledVGPRs: false
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103'
frameOffsetReg: '$fp_reg'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentWaveByteOffset: { reg: '$sgpr3' }
mode:
ieee: false
dx10-clamp: true
fp32-input-denormals: false
fp32-output-denormals: false
fp64-fp16-input-denormals: true
fp64-fp16-output-denormals: true
highBitsOf32BitAddress: 0
occupancy: 20
body: |
bb.0..entry:
liveins: $sgpr2, $vgpr0, $vgpr1
%4:vgpr_32 = COPY $vgpr1
%3:vgpr_32 = COPY $vgpr0
%2:sgpr_32 = COPY $sgpr2
$m0 = COPY %2
%6:vgpr_32 = V_INTERP_P1_F32 %3, 0, 0, implicit $mode, implicit $m0, implicit $exec
$m0 = COPY %2
%7:vgpr_32 = V_INTERP_P2_F32 %6, %4, 0, 0, implicit $mode, implicit $m0, implicit $exec
%8:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed %7, 0, 0, implicit $mode, implicit $exec
%9:sgpr_32 = IMPLICIT_DEF
%10:vgpr_32 = COPY %9
%11:sgpr_32 = IMPLICIT_DEF
%12:vgpr_32 = COPY %11
%13:sgpr_32 = IMPLICIT_DEF
%14:vgpr_32 = COPY %13
EXP_DONE 0, killed %8, %10, %12, %14, -1, 0, 1, implicit $exec
S_ENDPGM 0
...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment