Created
January 19, 2022 19:04
-
-
Save Flakebi/fd1d91a806b60ec330e9f61e19fe62ac to your computer and use it in GitHub Desktop.
MachineSink failure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | | |
; Compile with: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs -start-before=machine-sink -stop-after=machine-sink PipelineVsFs_0xDD57C231E25DA514.mir -o PipelineVsFs_0xDD57C231E25DA514-after.mir | |
; ModuleID = 'lgcPipeline' | |
source_filename = "lgcPipeline" | |
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" | |
target triple = "amdgcn--amdpal" | |
%llpc.array.element = type <{ float, [12 x i8] }> | |
define dllexport amdgpu_gs void @_amdgpu_gs_main(i32 inreg %userDataAddrLow, i32 inreg %userDataAddrHigh, i32 inreg %mergedGroupInfo, i32 inreg %mergedWaveInfo, i32 inreg %offChipLdsBase, i32 inreg %sharedScratchOffset, i32 inreg %primShaderTableAddrLow, i32 inreg %primShaderTableAddrHigh, <7 x i32> inreg %userData, i32 %esGsOffsets01, i32 %esGsOffsets23, i32 %gsPrimitiveId, i32 %invocationId, i32 %esGsOffsets45, i32 %vertexId, i32 %relVertexId, i32 %vsPrimitiveId, i32 %instanceId) #0 !lgc.shaderstage !4 { | |
.entry: | |
call void @llvm.amdgcn.init.exec(i64 -1) | |
%0 = lshr i32 %mergedGroupInfo, 22 | |
%primCountInSubgroup = and i32 %0, 511 | |
%1 = lshr i32 %mergedGroupInfo, 12 | |
%vertCountInSubgroup = and i32 %1, 511 | |
%2 = lshr i32 %mergedWaveInfo, 24 | |
%waveIdInSubgroup = and i32 %2, 15 | |
call void @llvm.amdgcn.s.barrier() | |
%3 = icmp eq i32 %waveIdInSubgroup, 0 | |
br i1 %3, label %.allocReq, label %.endAllocReq, !amdgpu.uniform !5 | |
.allocReq: ; preds = %.entry | |
%4 = shl nuw nsw i32 %primCountInSubgroup, 12 | |
%5 = or i32 %4, %vertCountInSubgroup | |
call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %5) | |
br label %.endAllocReq, !amdgpu.uniform !5 | |
.endAllocReq: ; preds = %.allocReq, %.entry | |
%6 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) | |
%threadIdInWave = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %6) | |
%7 = shl nuw nsw i32 %waveIdInSubgroup, 6 | |
%threadIdInSubgroup = add i32 %7, %threadIdInWave | |
%8 = icmp ult i32 %threadIdInSubgroup, %primCountInSubgroup | |
%9 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %8) | |
%10 = extractvalue { i1, i64 } %9, 0 | |
%11 = extractvalue { i1, i64 } %9, 1 | |
br i1 %10, label %.expPrim, label %.endExpPrim | |
.expPrim: ; preds = %.endAllocReq | |
call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %esGsOffsets01, i32 undef, i32 undef, i32 undef, i1 true, i1 false) | |
br label %.endExpPrim, !amdgpu.uniform !5 | |
.endExpPrim: ; preds = %.expPrim, %.endAllocReq | |
call void @llvm.amdgcn.end.cf.i64(i64 %11) | |
%12 = icmp ult i32 %threadIdInSubgroup, %vertCountInSubgroup | |
%13 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %12) | |
%14 = extractvalue { i1, i64 } %13, 0 | |
%15 = extractvalue { i1, i64 } %13, 1 | |
br i1 %14, label %.expVert, label %Flow14 | |
.expVert: ; preds = %.endExpPrim | |
%16 = extractelement <7 x i32> %userData, i64 2 | |
%17 = extractelement <7 x i32> %userData, i64 4 | |
%18 = extractelement <7 x i32> %userData, i64 5 | |
%19 = call i64 @llvm.amdgcn.s.getpc() #11 | |
%extelt.offset.i = lshr i64 %19, 32 | |
%.i1.i = trunc i64 %extelt.offset.i to i32 | |
%.upto0.i = insertelement <2 x i32> poison, i32 %16, i64 0 | |
%20 = insertelement <2 x i32> %.upto0.i, i32 %.i1.i, i64 1 | |
%21 = bitcast <2 x i32> %20 to i64 | |
%VertexIndex.i = add i32 %18, %vertexId | |
%.upto023.i = insertelement <2 x i32> poison, i32 %17, i64 0 | |
%22 = insertelement <2 x i32> %.upto023.i, i32 %.i1.i, i64 1 | |
%23 = bitcast <2 x i32> %22 to i64 | |
%24 = inttoptr i64 %23 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5 | |
%25 = inttoptr i64 %21 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5 | |
%26 = load <4 x i32>, <4 x i32> addrspace(4)* %25, align 16 | |
%27 = load <4 x i32>, <4 x i32> addrspace(4)* %24, align 16, !invariant.load !5 | |
%28 = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %27, i32 %VertexIndex.i, i32 0, i32 0, i32 immarg 77, i32 immarg 0) #5 | |
%29 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #11 | |
%30 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %29) #11 | |
%31 = call i32 asm sideeffect "; %1", "=v,0"(i32 1) #11 | |
%32 = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %31, i32 0, i32 33) #11 | |
%.not19.i = icmp eq i32 %30, 0 | |
%.not19.i.inv = xor i1 %.not19.i, true | |
%33 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not19.i.inv) | |
%34 = extractvalue { i1, i64 } %33, 0 | |
%35 = extractvalue { i1, i64 } %33, 1 | |
br i1 %34, label %.lr.ph16.i.preheader, label %Flow13 | |
.lr.ph16.i.preheader: ; preds = %.expVert | |
%36 = zext i32 %30 to i64 | |
br label %.lr.ph16.i, !amdgpu.uniform !5 | |
Flow13: ; preds = %Flow12, %.expVert | |
%37 = phi float [ %.1.i.lcssa, %Flow12 ], [ 0.000000e+00, %.expVert ] | |
call void @llvm.amdgcn.end.cf.i64(i64 %35) | |
br label %._crit_edge17.i, !amdgpu.uniform !5 | |
.lr.ph16.i: ; preds = %.lr.ph16.i.preheader, %44 | |
%phi.broken = phi i64 [ 0, %.lr.ph16.i.preheader ], [ %45, %44 ] | |
%lsr.iv8 = phi i32 [ %lsr.iv.next9, %44 ], [ 0, %.lr.ph16.i.preheader ] | |
%lsr.iv4 = phi i64 [ %lsr.iv.next5, %44 ], [ 0, %.lr.ph16.i.preheader ] | |
%.02913.i = phi float [ %.1.i, %44 ], [ 0.000000e+00, %.lr.ph16.i.preheader ] | |
%38 = shl nuw i64 1, %lsr.iv4 | |
%39 = and i64 %38, %32 | |
%.not9.i = icmp eq i64 %39, 0 | |
%.not9.i.inv = xor i1 %.not9.i, true | |
br i1 %.not9.i.inv, label %40, label %44, !amdgpu.uniform !5 | |
40: ; preds = %.lr.ph16.i | |
%41 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %lsr.iv8, i32 0) #11 | |
%42 = bitcast i32 %41 to float | |
%43 = fadd reassoc nnan nsz arcp contract afn float %.02913.i, %42 | |
br label %44, !amdgpu.uniform !5 | |
44: ; preds = %40, %.lr.ph16.i | |
%.1.i = phi float [ %43, %40 ], [ %.02913.i, %.lr.ph16.i ] | |
%lsr.iv.next5 = add nuw nsw i64 %lsr.iv4, 1 | |
%tmp6 = trunc i64 %lsr.iv.next5 to i32 | |
%lsr.iv.next9 = add i32 %lsr.iv8, 16 | |
%exitcond21.not.i = icmp eq i32 %30, %tmp6 | |
%45 = call i64 @llvm.amdgcn.if.break.i64(i1 %exitcond21.not.i, i64 %phi.broken) | |
%46 = call i1 @llvm.amdgcn.loop.i64(i64 %45) | |
br i1 %46, label %Flow12, label %.lr.ph16.i | |
Flow12: ; preds = %44 | |
%.1.i.lcssa = phi float [ %.1.i, %44 ] | |
%.lcssa16 = phi i64 [ %45, %44 ] | |
call void @llvm.amdgcn.end.cf.i64(i64 %.lcssa16) | |
br label %Flow13, !amdgpu.uniform !5 | |
Flow14: ; preds = %lgc.ngg.ES.main.exit, %.endExpPrim | |
call void @llvm.amdgcn.end.cf.i64(i64 %15) | |
br label %.endExpVert, !amdgpu.uniform !5 | |
._crit_edge17.i: ; preds = %Flow13 | |
%47 = getelementptr <{ [128 x %llpc.array.element] }>, <{ [128 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 0, i32 %30, i32 0 | |
%48 = ptrtoint float addrspace(6)* %47 to i32 | |
%49 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %48, i32 0) #11 | |
%50 = call i32 asm sideeffect "; %1", "=v,0"(i32 %49) #11 | |
%51 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %50, i32 0) #11 | |
%52 = zext i32 %30 to i64 | |
%53 = shl nuw i64 1, %52 | |
%54 = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %51, i32 1698898191, i32 -305419897, i1 true, i1 false) #11 | |
%55 = call i32 @llvm.amdgcn.readlane(i32 %54, i32 16) #11 | |
%56 = call i32 @llvm.amdgcn.writelane(i32 %55, i32 48, i32 %54) #11 | |
%57 = call i32 @llvm.amdgcn.writelane(i32 0, i32 16, i32 %56) #11 | |
%58 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %57, i32 0, i32 -1, i1 true, i1 false) #11 | |
%59 = and i64 %53, 281479271743489 | |
%.not1.i = icmp eq i64 %59, 0 | |
%.v.i = select i1 %.not1.i, i32 %57, i32 %58 | |
%60 = bitcast i32 %.v.i to float | |
%61 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 273, i32 15, i32 15, i1 false) #11 | |
%62 = bitcast i32 %61 to float | |
%63 = fadd reassoc nnan nsz arcp contract afn float %60, %62 | |
%64 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 274, i32 15, i32 15, i1 false) #11 | |
%65 = bitcast i32 %64 to float | |
%66 = fadd reassoc nnan nsz arcp contract afn float %63, %65 | |
%67 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v.i, i32 275, i32 15, i32 15, i1 false) #11 | |
%68 = bitcast i32 %67 to float | |
%69 = fadd reassoc nnan nsz arcp contract afn float %66, %68 | |
%70 = bitcast float %69 to i32 | |
%71 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %70, i32 276, i32 15, i32 14, i1 false) #11 | |
%72 = bitcast i32 %71 to float | |
%73 = fadd reassoc nnan nsz arcp contract afn float %69, %72 | |
%74 = bitcast float %73 to i32 | |
%75 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %74, i32 280, i32 15, i32 12, i1 false) #11 | |
%76 = bitcast i32 %75 to float | |
%77 = fadd reassoc nnan nsz arcp contract afn float %73, %76 | |
%78 = bitcast float %77 to i32 | |
%79 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %78, i32 -1, i32 -1, i1 true, i1 false) #11 | |
%80 = bitcast i32 %79 to float | |
%81 = and i64 %53, -281470681808896 | |
%.not2.i = icmp eq i64 %81, 0 | |
%82 = select reassoc nnan nsz arcp contract afn i1 %.not2.i, float 0.000000e+00, float %80 | |
%83 = fadd reassoc nnan nsz arcp contract afn float %77, %82 | |
%84 = bitcast float %83 to i32 | |
%85 = call i32 @llvm.amdgcn.readlane(i32 %84, i32 31) #11 | |
%86 = bitcast i32 %85 to float | |
%.not3.i = icmp ult i32 %30, 32 | |
%87 = select reassoc nnan nsz arcp contract afn i1 %.not3.i, float 0.000000e+00, float %86 | |
%88 = fadd reassoc nnan nsz arcp contract afn float %83, %87 | |
%89 = bitcast float %88 to i32 | |
%90 = call i32 @llvm.amdgcn.wwm.i32(i32 %89) #11 | |
%91 = and i32 %30, 1 | |
%.not.i = icmp eq i32 %91, 0 | |
%.not.i.inv = xor i1 %.not.i, true | |
%92 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not.i.inv) | |
%93 = extractvalue { i1, i64 } %92, 0 | |
%94 = extractvalue { i1, i64 } %92, 1 | |
br i1 %93, label %95, label %Flow11 | |
95: ; preds = %._crit_edge17.i | |
%96 = call i32 asm sideeffect "; %1", "=v,0"(i32 1) #11 | |
%97 = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %96, i32 0, i32 33) #11 | |
%98 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %.not19.i.inv) | |
%99 = extractvalue { i1, i64 } %98, 0 | |
%100 = extractvalue { i1, i64 } %98, 1 | |
br i1 %99, label %.lr.ph.i.preheader, label %Flow10 | |
.lr.ph.i.preheader: ; preds = %95 | |
br label %.lr.ph.i, !amdgpu.uniform !5 | |
Flow10: ; preds = %Flow, %95 | |
%101 = phi float [ %.3.i.lcssa, %Flow ], [ 0.000000e+00, %95 ] | |
call void @llvm.amdgcn.end.cf.i64(i64 %100) | |
br label %._crit_edge.i, !amdgpu.uniform !5 | |
.lr.ph.i: ; preds = %.lr.ph.i.preheader, %108 | |
%phi.broken15 = phi i64 [ 0, %.lr.ph.i.preheader ], [ %109, %108 ] | |
%lsr.iv2 = phi i32 [ %lsr.iv.next3, %108 ], [ 0, %.lr.ph.i.preheader ] | |
%lsr.iv = phi i64 [ %lsr.iv.next, %108 ], [ 0, %.lr.ph.i.preheader ] | |
%.210.i = phi float [ %.3.i, %108 ], [ 0.000000e+00, %.lr.ph.i.preheader ] | |
%102 = shl nuw i64 1, %lsr.iv | |
%103 = and i64 %102, %97 | |
%.not8.i = icmp eq i64 %103, 0 | |
%.not8.i.inv = xor i1 %.not8.i, true | |
br i1 %.not8.i.inv, label %104, label %108, !amdgpu.uniform !5 | |
104: ; preds = %.lr.ph.i | |
%105 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %26, i32 %lsr.iv2, i32 0) #11 | |
%106 = bitcast i32 %105 to float | |
%107 = fadd reassoc nnan nsz arcp contract afn float %.210.i, %106 | |
br label %108, !amdgpu.uniform !5 | |
108: ; preds = %104, %.lr.ph.i | |
%.3.i = phi float [ %107, %104 ], [ %.210.i, %.lr.ph.i ] | |
%lsr.iv.next = add nuw nsw i64 %lsr.iv, 1 | |
%tmp = trunc i64 %lsr.iv.next to i32 | |
%lsr.iv.next3 = add i32 %lsr.iv2, 16 | |
%exitcond.not.i = icmp eq i32 %30, %tmp | |
%109 = call i64 @llvm.amdgcn.if.break.i64(i1 %exitcond.not.i, i64 %phi.broken15) | |
%110 = call i1 @llvm.amdgcn.loop.i64(i64 %109) | |
br i1 %110, label %Flow, label %.lr.ph.i | |
Flow: ; preds = %108 | |
%.3.i.lcssa = phi float [ %.3.i, %108 ] | |
%.lcssa = phi i64 [ %109, %108 ] | |
call void @llvm.amdgcn.end.cf.i64(i64 %.lcssa) | |
br label %Flow10, !amdgpu.uniform !5 | |
Flow11: ; preds = %._crit_edge.i, %._crit_edge17.i | |
%111 = phi i32 [ %153, %._crit_edge.i ], [ 2, %._crit_edge17.i ] | |
call void @llvm.amdgcn.end.cf.i64(i64 %94) | |
br label %lgc.ngg.ES.main.exit, !amdgpu.uniform !5 | |
._crit_edge.i: ; preds = %Flow10 | |
%112 = call i32 asm sideeffect "; %1", "=v,0"(i32 %49) #11 | |
%113 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %112, i32 0) #11 | |
%114 = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %113, i32 1698898191, i32 -305419897, i1 true, i1 false) #11 | |
%115 = call i32 @llvm.amdgcn.readlane(i32 %114, i32 16) #11 | |
%116 = call i32 @llvm.amdgcn.writelane(i32 %115, i32 48, i32 %114) #11 | |
%117 = call i32 @llvm.amdgcn.writelane(i32 0, i32 16, i32 %116) #11 | |
%118 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %117, i32 0, i32 -1, i1 true, i1 false) #11 | |
%.v5.i = select i1 %.not1.i, i32 %117, i32 %118 | |
%119 = bitcast i32 %.v5.i to float | |
%120 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 273, i32 15, i32 15, i1 false) #11 | |
%121 = bitcast i32 %120 to float | |
%122 = fadd reassoc nnan nsz arcp contract afn float %119, %121 | |
%123 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 274, i32 15, i32 15, i1 false) #11 | |
%124 = bitcast i32 %123 to float | |
%125 = fadd reassoc nnan nsz arcp contract afn float %122, %124 | |
%126 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %.v5.i, i32 275, i32 15, i32 15, i1 false) #11 | |
%127 = bitcast i32 %126 to float | |
%128 = fadd reassoc nnan nsz arcp contract afn float %125, %127 | |
%129 = bitcast float %128 to i32 | |
%130 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %129, i32 276, i32 15, i32 14, i1 false) #11 | |
%131 = bitcast i32 %130 to float | |
%132 = fadd reassoc nnan nsz arcp contract afn float %128, %131 | |
%133 = bitcast float %132 to i32 | |
%134 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %133, i32 280, i32 15, i32 12, i1 false) #11 | |
%135 = bitcast i32 %134 to float | |
%136 = fadd reassoc nnan nsz arcp contract afn float %132, %135 | |
%137 = bitcast float %136 to i32 | |
%138 = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %137, i32 -1, i32 -1, i1 true, i1 false) #11 | |
%139 = bitcast i32 %138 to float | |
%140 = select reassoc nnan nsz arcp contract afn i1 %.not2.i, float 0.000000e+00, float %139 | |
%141 = fadd reassoc nnan nsz arcp contract afn float %136, %140 | |
%142 = bitcast float %141 to i32 | |
%143 = call i32 @llvm.amdgcn.readlane(i32 %142, i32 31) #11 | |
%144 = bitcast i32 %143 to float | |
%145 = select reassoc nnan nsz arcp contract afn i1 %.not3.i, float 0.000000e+00, float %144 | |
%146 = fadd reassoc nnan nsz arcp contract afn float %141, %145 | |
%147 = bitcast float %146 to i32 | |
%148 = call i32 @llvm.amdgcn.wwm.i32(i32 %147) #11 | |
%149 = bitcast i32 %148 to float | |
%150 = fsub reassoc nnan nsz arcp contract afn float %101, %149 | |
%151 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %150) #11 | |
%152 = fcmp olt float %151, 0x3EE4F8B580000000 | |
%153 = select i1 %152, i32 2, i32 0 | |
br label %Flow11, !amdgpu.uniform !5 | |
lgc.ngg.ES.main.exit: ; preds = %Flow11 | |
%bc29.i = bitcast <4 x i32> %28 to <4 x float> | |
%vertex0.0.i3.i = extractelement <4 x float> %bc29.i, i64 3 | |
%vertex0.0.i2.i = extractelement <4 x float> %bc29.i, i64 2 | |
%vertex0.0.i1.i = extractelement <4 x float> %bc29.i, i64 1 | |
%vertex0.0.i0.i = extractelement <4 x float> %bc29.i, i64 0 | |
%154 = bitcast i32 %90 to float | |
%155 = fsub reassoc nnan nsz arcp contract afn float %37, %154 | |
%156 = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %155) #11 | |
%157 = fcmp olt float %156, 0x3EE4F8B580000000 | |
%158 = zext i1 %157 to i32 | |
%.028.i = or i32 %111, %158 | |
%159 = uitofp i32 %.028.i to float | |
call void @llvm.amdgcn.exp.f32(i32 immarg 12, i32 immarg 15, float %vertex0.0.i0.i, float %vertex0.0.i1.i, float %vertex0.0.i2.i, float %vertex0.0.i3.i, i1 immarg false, i1 immarg false) #6 | |
call void @llvm.amdgcn.exp.f32(i32 immarg 13, i32 immarg 1, float 1.000000e+00, float undef, float undef, float undef, i1 immarg true, i1 immarg false) #6 | |
call void @llvm.amdgcn.exp.f32(i32 32, i32 1, float %159, float undef, float undef, float undef, i1 false, i1 false) #11 | |
br label %Flow14, !amdgpu.uniform !5 | |
.endExpVert: ; preds = %Flow14 | |
ret void | |
} | |
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn | |
declare float @llvm.fabs.f32(float) #1 | |
; Function Attrs: nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2 | |
; Function Attrs: nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #2 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.readlane(i32, i32) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg) #3 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #3 | |
; Function Attrs: convergent nounwind readnone speculatable willreturn | |
declare i32 @llvm.amdgcn.wwm.i32(i32) #4 | |
; Function Attrs: nounwind readonly willreturn | |
declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #5 | |
; Function Attrs: inaccessiblememonly nounwind willreturn writeonly | |
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #6 | |
; Function Attrs: nounwind | |
define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %globalTable, i32 inreg %perShaderTable, i32 inreg %PrimMask, <2 x float> %PerspInterpSample, <2 x float> %PerspInterpCenter, <2 x float> %PerspInterpCentroid, <3 x float> %PerspInterpPullMode, <2 x float> %LinearInterpSample, <2 x float> %LinearInterpCenter, <2 x float> %LinearInterpCentroid, float %LineStipple, float %FragCoordX, float %FragCoordY, float %FragCoordZ, float %FragCoordW, i32 %FrontFacing, i32 %Ancillary, i32 %SampleCoverage, i32 %FixedXY) #7 !lgc.shaderstage !6 { | |
.entry: | |
%PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1 | |
%PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0 | |
%0 = call float @llvm.amdgcn.interp.p1(float %PerspInterpCenter.i0, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #8 | |
%1 = call float @llvm.amdgcn.interp.p2(float %0, float %PerspInterpCenter.i1, i32 immarg 0, i32 immarg 0, i32 %PrimMask) #8 | |
%2 = fptoui float %1 to i32 | |
%3 = bitcast i32 %2 to float | |
call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 1, float %3, float undef, float undef, float undef, i1 immarg true, i1 immarg true) #6 | |
ret void | |
} | |
; Function Attrs: nounwind readnone speculatable willreturn | |
declare i64 @llvm.amdgcn.s.getpc() #8 | |
; Function Attrs: nounwind readnone speculatable willreturn | |
declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #8 | |
; Function Attrs: nounwind readnone speculatable willreturn | |
declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #8 | |
; Function Attrs: nounwind readnone willreturn | |
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #2 | |
; Function Attrs: convergent nounwind | |
declare void @llvm.amdgcn.init.exec(i64 immarg) #9 | |
; Function Attrs: convergent nounwind willreturn | |
declare void @llvm.amdgcn.s.barrier() #10 | |
; Function Attrs: nounwind | |
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32) #11 | |
; Function Attrs: inaccessiblememonly nounwind willreturn writeonly | |
declare void @llvm.amdgcn.exp.i32(i32 immarg, i32 immarg, i32, i32, i32, i32, i1 immarg, i1 immarg) #6 | |
; Function Attrs: convergent nounwind willreturn | |
declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #10 | |
; Function Attrs: convergent nounwind willreturn | |
declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #10 | |
; Function Attrs: convergent nounwind readnone willreturn | |
declare i64 @llvm.amdgcn.if.break.i64(i1, i64) #3 | |
; Function Attrs: convergent nounwind willreturn | |
declare i1 @llvm.amdgcn.loop.i64(i64) #10 | |
; Function Attrs: convergent nounwind willreturn | |
declare void @llvm.amdgcn.end.cf.i64(i64) #10 | |
attributes #0 = { "amdgpu-flat-work-group-size"="128,128" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,-xnack" "uniform-work-group-size"="false" } | |
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } | |
attributes #2 = { nounwind readnone willreturn } | |
attributes #3 = { convergent nounwind readnone willreturn } | |
attributes #4 = { convergent nounwind readnone speculatable willreturn } | |
attributes #5 = { nounwind readonly willreturn } | |
attributes #6 = { inaccessiblememonly nounwind willreturn writeonly } | |
attributes #7 = { nounwind "InitialPSInputAddr"="2" "amdgpu-color-export"="1" "amdgpu-depth-export"="0" "amdgpu-unroll-threshold"="700" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,-xnack" "uniform-work-group-size"="false" } | |
attributes #8 = { nounwind readnone speculatable willreturn } | |
attributes #9 = { convergent nounwind } | |
attributes #10 = { convergent nounwind willreturn } | |
attributes #11 = { nounwind } | |
!lgc.client = !{!0} | |
!lgc.options.VS = !{!1} | |
!lgc.options.FS = !{!2} | |
!amdgpu.pal.metadata.msgpack = !{!3} | |
!0 = !{!"Vulkan"} | |
!1 = !{i32 609901243, i32 -1828602377, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} | |
!2 = !{i32 -1768611618, i32 2041457464, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 1800} | |
!3 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B0.hardware_stages\82\A3.gs\83\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\A3.ps\83\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\DDW\C21\E2]\A5\14\CF\C6N}\07gn\AB\8B\B0.nggSubgroupSize\CC\80\AA.registers\DE\00C\CD,\01\00\CD,\06\CE\EF;\0B\E6\CD,\0A\CE\02,\00\00\CD,\0B\04\CD,\0C\CE\10\00\00\00\CD,2\01\CD,3\01\CD,4\01\CD,5\01\CD,\80\CE\B7[\E7L\CD,\81\00\CD,\8A\CE\22,\00\00\CD,\8B\0E\CD,\8C\CE\10\00\00\00\CD,\8E\00\CD,\8F\CE\10\00\00\0A\CD,\90\CE\10\00\00\0F\CD,\91\CE\10\00\00\03\CD,\92\CE\10\00\00\04\CD,\B2\01\CD,\B3\01\CD,\B4\01\CD,\B5\01\CD\A0\8F\01\CD\A1\91\00\CD\A1\B1\00\CD\A1\B3\00\CD\A1\B4\00\CD\A1\B5\00\CD\A1\B6\01\CD\A1\B8\CE\01\00\00\00\CD\A1\C2\01\CD\A1\C3D\CD\A1\C4\00\CD\A1\C5\01\CD\A1\FF\CC\80\CD\A2\03\CD\08\10\CD\A2\04\CE\01\00\00\00\CD\A2\06\CD\04?\CD\A2\07\CE\01!\00\00\CD\A2\10\02\CD\A2\90\CE\000\00\03\CD\A2\91\CE \04\00~\CD\A2\93\CE\06\02\01\8C\CD\A2\97\02\CD\A2\98\00\CD\A2\99\00\CD\A2\9A\00\CD\A2\9B\00\CD\A2\A1\00\CD\A2\AB\01\CD\A2\AC\00\CD\A2\AD\00\CD\A2\CE\01\CD\A2\D3\CE\00\02\00\01\CD\A2\D5\CE\02\01 \10\CD\A2\D7\00\CD\A2\D8\00\CD\A2\D9\00\CD\A2\DA\00\CD\A2\E4\00\CD\A2\F8\00\CD\A2\F9-\CD\A3\10\00\CD\C2X\7F\CD\C2_\00\CD\C2b\00\A8.shaders\82\A6.pixel\82\B0.api_shader_hash\92\CFy\AE+8\96\95 \DE\00\B1.hardware_mapping\91\A3.ps\A7.vertex\82\B0.api_shader_hash\92\CF\93\01\BD\F7$ZZ\BB\00\B1.hardware_mapping\91\A3.gs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A3Ngg\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF,\C2\BF\A8}\D5\A7\F8\CF\16\98\06\8C\C5e\18A\AD.llpc_version\A452.1\AEamdpal.version\92\02\03"} | |
!4 = !{i32 1} | |
!5 = !{} | |
!6 = !{i32 6} | |
... | |
--- | |
name: _amdgpu_gs_main | |
alignment: 1 | |
exposesReturnsTwice: false | |
legalized: false | |
regBankSelected: false | |
selected: false | |
failedISel: false | |
tracksRegLiveness: true | |
hasWinCFI: false | |
failsVerification: false | |
tracksDebugUserValues: false | |
registers: | |
- { id: 0, class: sreg_32, preferred-register: '' } | |
- { id: 1, class: sreg_32, preferred-register: '' } | |
- { id: 2, class: sreg_32, preferred-register: '' } | |
- { id: 3, class: vgpr_32, preferred-register: '' } | |
- { id: 4, class: sreg_64, preferred-register: '' } | |
- { id: 5, class: sreg_64, preferred-register: '' } | |
- { id: 6, class: sgpr_128, preferred-register: '' } | |
- { id: 7, class: vreg_128, preferred-register: '' } | |
- { id: 8, class: vgpr_32, preferred-register: '' } | |
- { id: 9, class: sreg_64, preferred-register: '' } | |
- { id: 10, class: sreg_64, preferred-register: '' } | |
- { id: 11, class: sreg_64, preferred-register: '' } | |
- { id: 12, class: vgpr_32, preferred-register: '' } | |
- { id: 13, class: sreg_64, preferred-register: '' } | |
- { id: 14, class: sreg_32, preferred-register: '' } | |
- { id: 15, class: sreg_64, preferred-register: '' } | |
- { id: 16, class: sgpr_32, preferred-register: '' } | |
- { id: 17, class: sgpr_32, preferred-register: '' } | |
- { id: 18, class: sgpr_32, preferred-register: '' } | |
- { id: 19, class: sreg_64, preferred-register: '' } | |
- { id: 20, class: sreg_32, preferred-register: '' } | |
- { id: 21, class: sreg_64, preferred-register: '' } | |
- { id: 22, class: vgpr_32, preferred-register: '' } | |
- { id: 23, class: sreg_64, preferred-register: '' } | |
- { id: 24, class: vgpr_32, preferred-register: '' } | |
- { id: 25, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 26, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 27, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 28, class: vgpr_32, preferred-register: '' } | |
- { id: 29, class: sreg_64, preferred-register: '' } | |
- { id: 30, class: sreg_64, preferred-register: '' } | |
- { id: 31, class: sreg_64, preferred-register: '' } | |
- { id: 32, class: vgpr_32, preferred-register: '' } | |
- { id: 33, class: sreg_64, preferred-register: '' } | |
- { id: 34, class: sreg_32, preferred-register: '' } | |
- { id: 35, class: sreg_64, preferred-register: '' } | |
- { id: 36, class: sgpr_32, preferred-register: '' } | |
- { id: 37, class: sgpr_32, preferred-register: '' } | |
- { id: 38, class: sgpr_32, preferred-register: '' } | |
- { id: 39, class: sreg_64, preferred-register: '' } | |
- { id: 40, class: sreg_32, preferred-register: '' } | |
- { id: 41, class: sreg_64, preferred-register: '' } | |
- { id: 42, class: vgpr_32, preferred-register: '' } | |
- { id: 43, class: sreg_64, preferred-register: '' } | |
- { id: 44, class: vgpr_32, preferred-register: '' } | |
- { id: 45, class: vgpr_32, preferred-register: '' } | |
- { id: 46, class: sgpr_32, preferred-register: '' } | |
- { id: 47, class: sgpr_32, preferred-register: '' } | |
- { id: 48, class: sgpr_32, preferred-register: '' } | |
- { id: 49, class: sgpr_32, preferred-register: '' } | |
- { id: 50, class: sgpr_32, preferred-register: '' } | |
- { id: 51, class: sgpr_32, preferred-register: '' } | |
- { id: 52, class: sgpr_32, preferred-register: '' } | |
- { id: 53, class: sgpr_32, preferred-register: '' } | |
- { id: 54, class: sgpr_32, preferred-register: '' } | |
- { id: 55, class: sgpr_32, preferred-register: '' } | |
- { id: 56, class: sgpr_32, preferred-register: '' } | |
- { id: 57, class: sgpr_32, preferred-register: '' } | |
- { id: 58, class: sgpr_32, preferred-register: '' } | |
- { id: 59, class: sgpr_32, preferred-register: '' } | |
- { id: 60, class: sgpr_32, preferred-register: '' } | |
- { id: 61, class: vgpr_32, preferred-register: '' } | |
- { id: 62, class: vgpr_32, preferred-register: '' } | |
- { id: 63, class: vgpr_32, preferred-register: '' } | |
- { id: 64, class: vgpr_32, preferred-register: '' } | |
- { id: 65, class: vgpr_32, preferred-register: '' } | |
- { id: 66, class: vgpr_32, preferred-register: '' } | |
- { id: 67, class: vgpr_32, preferred-register: '' } | |
- { id: 68, class: vgpr_32, preferred-register: '' } | |
- { id: 69, class: vgpr_32, preferred-register: '' } | |
- { id: 70, class: sgpr_224, preferred-register: '' } | |
- { id: 71, class: sgpr_224, preferred-register: '' } | |
- { id: 72, class: sreg_32, preferred-register: '' } | |
- { id: 73, class: sreg_32, preferred-register: '' } | |
- { id: 74, class: sreg_32, preferred-register: '' } | |
- { id: 75, class: sreg_32, preferred-register: '' } | |
- { id: 76, class: sreg_32, preferred-register: '' } | |
- { id: 77, class: sreg_32, preferred-register: '' } | |
- { id: 78, class: vgpr_32, preferred-register: '' } | |
- { id: 79, class: vgpr_32, preferred-register: '' } | |
- { id: 80, class: sreg_32, preferred-register: '' } | |
- { id: 81, class: sreg_64, preferred-register: '' } | |
- { id: 82, class: sreg_32, preferred-register: '' } | |
- { id: 83, class: vgpr_32, preferred-register: '' } | |
- { id: 84, class: sreg_32, preferred-register: '' } | |
- { id: 85, class: vgpr_32, preferred-register: '' } | |
- { id: 86, class: sreg_32, preferred-register: '' } | |
- { id: 87, class: vgpr_32, preferred-register: '' } | |
- { id: 88, class: sreg_64, preferred-register: '' } | |
- { id: 89, class: vgpr_32, preferred-register: '' } | |
- { id: 90, class: vgpr_32, preferred-register: '' } | |
- { id: 91, class: sgpr_32, preferred-register: '' } | |
- { id: 92, class: sreg_32, preferred-register: '' } | |
- { id: 93, class: sreg_32, preferred-register: '' } | |
- { id: 94, class: sreg_32, preferred-register: '' } | |
- { id: 95, class: sreg_64, preferred-register: '' } | |
- { id: 96, class: sreg_32, preferred-register: '' } | |
- { id: 97, class: sreg_64, preferred-register: '' } | |
- { id: 98, class: vgpr_32, preferred-register: '' } | |
- { id: 99, class: sreg_64, preferred-register: '' } | |
- { id: 100, class: sgpr_128, preferred-register: '' } | |
- { id: 101, class: sreg_32, preferred-register: '' } | |
- { id: 102, class: sreg_32, preferred-register: '' } | |
- { id: 103, class: vgpr_32, preferred-register: '' } | |
- { id: 104, class: sreg_64, preferred-register: '' } | |
- { id: 105, class: sreg_64, preferred-register: '' } | |
- { id: 106, class: sreg_64, preferred-register: '' } | |
- { id: 107, class: sreg_64, preferred-register: '' } | |
- { id: 108, class: sreg_32, preferred-register: '' } | |
- { id: 109, class: sgpr_32, preferred-register: '' } | |
- { id: 110, class: sreg_32, preferred-register: '' } | |
- { id: 111, class: sreg_64, preferred-register: '' } | |
- { id: 112, class: sreg_32, preferred-register: '' } | |
- { id: 113, class: sreg_32, preferred-register: '' } | |
- { id: 114, class: sreg_32, preferred-register: '' } | |
- { id: 115, class: sreg_32, preferred-register: '' } | |
- { id: 116, class: sreg_64, preferred-register: '' } | |
- { id: 117, class: sreg_64, preferred-register: '' } | |
- { id: 118, class: sreg_32_xm0_xexec, preferred-register: '' } | |
- { id: 119, class: vgpr_32, preferred-register: '' } | |
- { id: 120, class: sreg_64, preferred-register: '' } | |
- { id: 121, class: sreg_32, preferred-register: '' } | |
- { id: 122, class: sreg_32, preferred-register: '' } | |
- { id: 123, class: sreg_64, preferred-register: '' } | |
- { id: 124, class: vgpr_32, preferred-register: '' } | |
- { id: 125, class: vgpr_32, preferred-register: '' } | |
- { id: 126, class: sreg_32, preferred-register: '' } | |
- { id: 127, class: sreg_32, preferred-register: '' } | |
- { id: 128, class: vgpr_32, preferred-register: '' } | |
- { id: 129, class: sgpr_32, preferred-register: '' } | |
- { id: 130, class: vgpr_32, preferred-register: '' } | |
- { id: 131, class: sreg_64, preferred-register: '' } | |
- { id: 132, class: vreg_64, preferred-register: '' } | |
- { id: 133, class: vgpr_32, preferred-register: '' } | |
- { id: 134, class: vgpr_32, preferred-register: '' } | |
- { id: 135, class: sreg_32, preferred-register: '' } | |
- { id: 136, class: sreg_32, preferred-register: '' } | |
- { id: 137, class: vgpr_32, preferred-register: '' } | |
- { id: 138, class: sreg_32, preferred-register: '' } | |
- { id: 139, class: vgpr_32, preferred-register: '' } | |
- { id: 140, class: sreg_32, preferred-register: '' } | |
- { id: 141, class: sreg_32, preferred-register: '' } | |
- { id: 142, class: sreg_32, preferred-register: '' } | |
- { id: 143, class: vgpr_32, preferred-register: '' } | |
- { id: 144, class: vgpr_32, preferred-register: '' } | |
- { id: 145, class: sreg_32, preferred-register: '' } | |
- { id: 146, class: vgpr_32, preferred-register: '' } | |
- { id: 147, class: sreg_32, preferred-register: '' } | |
- { id: 148, class: vgpr_32, preferred-register: '' } | |
- { id: 149, class: sreg_32, preferred-register: '' } | |
- { id: 150, class: vgpr_32, preferred-register: '' } | |
- { id: 151, class: vgpr_32, preferred-register: '' } | |
- { id: 152, class: sreg_64, preferred-register: '' } | |
- { id: 153, class: sreg_64, preferred-register: '' } | |
- { id: 154, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 155, class: vgpr_32, preferred-register: '' } | |
- { id: 156, class: vgpr_32, preferred-register: '' } | |
- { id: 157, class: vgpr_32, preferred-register: '' } | |
- { id: 158, class: vgpr_32, preferred-register: '' } | |
- { id: 159, class: vgpr_32, preferred-register: '' } | |
- { id: 160, class: vgpr_32, preferred-register: '' } | |
- { id: 161, class: vgpr_32, preferred-register: '' } | |
- { id: 162, class: vgpr_32, preferred-register: '' } | |
- { id: 163, class: vgpr_32, preferred-register: '' } | |
- { id: 164, class: vgpr_32, preferred-register: '' } | |
- { id: 165, class: vgpr_32, preferred-register: '' } | |
- { id: 166, class: vgpr_32, preferred-register: '' } | |
- { id: 167, class: vgpr_32, preferred-register: '' } | |
- { id: 168, class: vgpr_32, preferred-register: '' } | |
- { id: 169, class: vgpr_32, preferred-register: '' } | |
- { id: 170, class: vgpr_32, preferred-register: '' } | |
- { id: 171, class: vgpr_32, preferred-register: '' } | |
- { id: 172, class: sreg_32, preferred-register: '' } | |
- { id: 173, class: vgpr_32, preferred-register: '' } | |
- { id: 174, class: sreg_32, preferred-register: '' } | |
- { id: 175, class: vgpr_32, preferred-register: '' } | |
- { id: 176, class: vgpr_32, preferred-register: '' } | |
- { id: 177, class: sreg_64, preferred-register: '' } | |
- { id: 178, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 179, class: sgpr_32, preferred-register: '' } | |
- { id: 180, class: vgpr_32, preferred-register: '' } | |
- { id: 181, class: vgpr_32, preferred-register: '' } | |
- { id: 182, class: sreg_32, preferred-register: '' } | |
- { id: 183, class: sreg_32, preferred-register: '' } | |
- { id: 184, class: sreg_32, preferred-register: '' } | |
- { id: 185, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 186, class: vgpr_32, preferred-register: '' } | |
- { id: 187, class: vgpr_32, preferred-register: '' } | |
- { id: 188, class: vgpr_32, preferred-register: '' } | |
- { id: 189, class: vgpr_32, preferred-register: '' } | |
- { id: 190, class: sreg_64, preferred-register: '' } | |
- { id: 191, class: vgpr_32, preferred-register: '' } | |
- { id: 192, class: vgpr_32, preferred-register: '' } | |
- { id: 193, class: sgpr_32, preferred-register: '' } | |
- { id: 194, class: sreg_32, preferred-register: '' } | |
- { id: 195, class: sreg_64, preferred-register: '' } | |
- { id: 196, class: sreg_64, preferred-register: '' } | |
- { id: 197, class: sreg_64, preferred-register: '' } | |
- { id: 198, class: sreg_64, preferred-register: '' } | |
- { id: 199, class: sreg_32, preferred-register: '' } | |
- { id: 200, class: sgpr_32, preferred-register: '' } | |
- { id: 201, class: sreg_32, preferred-register: '' } | |
- { id: 202, class: sreg_64, preferred-register: '' } | |
- { id: 203, class: sreg_32, preferred-register: '' } | |
- { id: 204, class: sreg_32, preferred-register: '' } | |
- { id: 205, class: sreg_32, preferred-register: '' } | |
- { id: 206, class: sreg_32, preferred-register: '' } | |
- { id: 207, class: sreg_64, preferred-register: '' } | |
- { id: 208, class: sreg_64, preferred-register: '' } | |
- { id: 209, class: sreg_32_xm0_xexec, preferred-register: '' } | |
- { id: 210, class: vgpr_32, preferred-register: '' } | |
- { id: 211, class: sreg_64, preferred-register: '' } | |
- { id: 212, class: sreg_32, preferred-register: '' } | |
- { id: 213, class: sreg_32, preferred-register: '' } | |
- { id: 214, class: sreg_64, preferred-register: '' } | |
- { id: 215, class: vgpr_32, preferred-register: '' } | |
- { id: 216, class: vgpr_32, preferred-register: '' } | |
- { id: 217, class: sgpr_32, preferred-register: '' } | |
- { id: 218, class: vgpr_32, preferred-register: '' } | |
- { id: 219, class: sreg_32, preferred-register: '' } | |
- { id: 220, class: sreg_32, preferred-register: '' } | |
- { id: 221, class: vgpr_32, preferred-register: '' } | |
- { id: 222, class: sreg_32, preferred-register: '' } | |
- { id: 223, class: vgpr_32, preferred-register: '' } | |
- { id: 224, class: sreg_32, preferred-register: '' } | |
- { id: 225, class: sreg_32, preferred-register: '' } | |
- { id: 226, class: sreg_32, preferred-register: '' } | |
- { id: 227, class: vgpr_32, preferred-register: '' } | |
- { id: 228, class: vgpr_32, preferred-register: '' } | |
- { id: 229, class: sreg_32, preferred-register: '' } | |
- { id: 230, class: vgpr_32, preferred-register: '' } | |
- { id: 231, class: sreg_32, preferred-register: '' } | |
- { id: 232, class: vgpr_32, preferred-register: '' } | |
- { id: 233, class: vgpr_32, preferred-register: '' } | |
- { id: 234, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 235, class: vgpr_32, preferred-register: '' } | |
- { id: 236, class: vgpr_32, preferred-register: '' } | |
- { id: 237, class: vgpr_32, preferred-register: '' } | |
- { id: 238, class: vgpr_32, preferred-register: '' } | |
- { id: 239, class: vgpr_32, preferred-register: '' } | |
- { id: 240, class: vgpr_32, preferred-register: '' } | |
- { id: 241, class: vgpr_32, preferred-register: '' } | |
- { id: 242, class: vgpr_32, preferred-register: '' } | |
- { id: 243, class: vgpr_32, preferred-register: '' } | |
- { id: 244, class: vgpr_32, preferred-register: '' } | |
- { id: 245, class: vgpr_32, preferred-register: '' } | |
- { id: 246, class: vgpr_32, preferred-register: '' } | |
- { id: 247, class: vgpr_32, preferred-register: '' } | |
- { id: 248, class: vgpr_32, preferred-register: '' } | |
- { id: 249, class: vgpr_32, preferred-register: '' } | |
- { id: 250, class: vgpr_32, preferred-register: '' } | |
- { id: 251, class: sreg_32, preferred-register: '' } | |
- { id: 252, class: vgpr_32, preferred-register: '' } | |
- { id: 253, class: sgpr_32, preferred-register: '' } | |
- { id: 254, class: vgpr_32, preferred-register: '' } | |
- { id: 255, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 256, class: vgpr_32, preferred-register: '' } | |
- { id: 257, class: sreg_32, preferred-register: '' } | |
- { id: 258, class: sreg_32, preferred-register: '' } | |
- { id: 259, class: vgpr_32, preferred-register: '' } | |
- { id: 260, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 261, class: vgpr_32, preferred-register: '' } | |
- { id: 262, class: vgpr_32, preferred-register: '' } | |
- { id: 263, class: vgpr_32, preferred-register: '' } | |
- { id: 264, class: vgpr_32, preferred-register: '' } | |
- { id: 265, class: sgpr_32, preferred-register: '' } | |
- { id: 266, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 267, class: vgpr_32, preferred-register: '' } | |
- { id: 268, class: sreg_32, preferred-register: '' } | |
- { id: 269, class: vgpr_32, preferred-register: '' } | |
- { id: 270, class: vgpr_32, preferred-register: '' } | |
- { id: 271, class: vgpr_32, preferred-register: '' } | |
- { id: 272, class: vgpr_32, preferred-register: '' } | |
- { id: 273, class: vgpr_32, preferred-register: '' } | |
- { id: 274, class: sgpr_32, preferred-register: '' } | |
- { id: 275, class: sreg_64_xexec, preferred-register: '' } | |
- { id: 276, class: vgpr_32, preferred-register: '' } | |
- { id: 277, class: vgpr_32, preferred-register: '' } | |
- { id: 278, class: vgpr_32, preferred-register: '' } | |
- { id: 279, class: vgpr_32, preferred-register: '' } | |
- { id: 280, class: sgpr_32, preferred-register: '' } | |
- { id: 281, class: vgpr_32, preferred-register: '' } | |
- { id: 282, class: sgpr_32, preferred-register: '' } | |
- { id: 283, class: vgpr_32, preferred-register: '' } | |
- { id: 284, class: sgpr_32, preferred-register: '' } | |
- { id: 285, class: vgpr_32, preferred-register: '' } | |
- { id: 286, class: sgpr_32, preferred-register: '' } | |
- { id: 287, class: vgpr_32, preferred-register: '' } | |
- { id: 288, class: sgpr_32, preferred-register: '' } | |
- { id: 289, class: vgpr_32, preferred-register: '' } | |
- { id: 290, class: sgpr_32, preferred-register: '' } | |
- { id: 291, class: vgpr_32, preferred-register: '' } | |
- { id: 292, class: vgpr_32, preferred-register: '' } | |
- { id: 293, class: vgpr_32, preferred-register: '' } | |
- { id: 294, class: vgpr_32, preferred-register: '' } | |
- { id: 295, class: vgpr_32, preferred-register: '' } | |
- { id: 296, class: vgpr_32, preferred-register: '' } | |
- { id: 297, class: vreg_64, preferred-register: '' } | |
- { id: 298, class: vreg_64, preferred-register: '' } | |
- { id: 299, class: vgpr_32, preferred-register: '' } | |
- { id: 300, class: vgpr_32, preferred-register: '' } | |
- { id: 301, class: vgpr_32, preferred-register: '' } | |
- { id: 302, class: vgpr_32, preferred-register: '' } | |
- { id: 303, class: vgpr_32, preferred-register: '' } | |
- { id: 304, class: vgpr_32, preferred-register: '' } | |
- { id: 305, class: sreg_32, preferred-register: '' } | |
- { id: 306, class: sreg_32, preferred-register: '' } | |
- { id: 307, class: sreg_32, preferred-register: '' } | |
- { id: 308, class: sreg_32, preferred-register: '' } | |
- { id: 309, class: sreg_32, preferred-register: '' } | |
- { id: 310, class: sreg_32, preferred-register: '' } | |
- { id: 311, class: sreg_32, preferred-register: '' } | |
- { id: 312, class: sreg_32, preferred-register: '' } | |
- { id: 313, class: sreg_32, preferred-register: '' } | |
- { id: 314, class: sreg_32, preferred-register: '' } | |
- { id: 315, class: sreg_32, preferred-register: '' } | |
- { id: 316, class: sreg_32, preferred-register: '' } | |
liveins: | |
- { reg: '$sgpr2', virtual-reg: '%48' } | |
- { reg: '$sgpr3', virtual-reg: '%49' } | |
- { reg: '$sgpr8', virtual-reg: '%54' } | |
- { reg: '$sgpr9', virtual-reg: '%55' } | |
- { reg: '$sgpr10', virtual-reg: '%56' } | |
- { reg: '$sgpr11', virtual-reg: '%57' } | |
- { reg: '$sgpr12', virtual-reg: '%58' } | |
- { reg: '$sgpr13', virtual-reg: '%59' } | |
- { reg: '$sgpr14', virtual-reg: '%60' } | |
- { reg: '$vgpr0', virtual-reg: '%61' } | |
- { reg: '$vgpr5', virtual-reg: '%66' } | |
frameInfo: | |
isFrameAddressTaken: false | |
isReturnAddressTaken: false | |
hasStackMap: false | |
hasPatchPoint: false | |
stackSize: 0 | |
offsetAdjustment: 0 | |
maxAlignment: 1 | |
adjustsStack: false | |
hasCalls: false | |
stackProtector: '' | |
maxCallFrameSize: 4294967295 | |
cvBytesOfCalleeSavedRegisters: 0 | |
hasOpaqueSPAdjustment: false | |
hasVAStart: false | |
hasMustTailInVarArgFunc: false | |
hasTailCall: false | |
localFrameSize: 0 | |
savePoint: '' | |
restorePoint: '' | |
fixedStack: [] | |
stack: [] | |
callSites: [] | |
debugValueSubstitutions: [] | |
constants: [] | |
machineFunctionInfo: | |
explicitKernArgSize: 0 | |
maxKernArgAlign: 4 | |
ldsSize: 0 | |
dynLDSAlign: 1 | |
isEntryFunction: true | |
noSignedZerosFPMath: false | |
memoryBound: false | |
waveLimiter: false | |
hasSpilledSGPRs: false | |
hasSpilledVGPRs: false | |
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' | |
frameOffsetReg: '$fp_reg' | |
stackPtrOffsetReg: '$sgpr32' | |
argumentInfo: | |
privateSegmentWaveByteOffset: { reg: '$sgpr5' } | |
mode: | |
ieee: false | |
dx10-clamp: true | |
fp32-input-denormals: false | |
fp32-output-denormals: false | |
fp64-fp16-input-denormals: true | |
fp64-fp16-output-denormals: true | |
highBitsOf32BitAddress: 0 | |
occupancy: 20 | |
body: | | |
bb.0..entry: | |
successors: %bb.1(0x30000000), %bb.2(0x50000000) | |
liveins: $sgpr2, $sgpr3, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr5 | |
%66:vgpr_32 = COPY $vgpr5 | |
%61:vgpr_32 = COPY $vgpr0 | |
%60:sgpr_32 = COPY $sgpr14 | |
%59:sgpr_32 = COPY $sgpr13 | |
%58:sgpr_32 = COPY $sgpr12 | |
%57:sgpr_32 = COPY $sgpr11 | |
%56:sgpr_32 = COPY $sgpr10 | |
%55:sgpr_32 = COPY $sgpr9 | |
%54:sgpr_32 = COPY $sgpr8 | |
%49:sgpr_32 = COPY $sgpr3 | |
%48:sgpr_32 = COPY $sgpr2 | |
%71:sgpr_224 = REG_SEQUENCE %54, %subreg.sub0, %55, %subreg.sub1, %56, %subreg.sub2, %57, %subreg.sub3, %58, %subreg.sub4, %59, %subreg.sub5, %60, %subreg.sub6 | |
%70:sgpr_224 = COPY %71 | |
SI_INIT_EXEC -1, implicit-def dead $exec | |
%0:sreg_32 = S_BFE_U32 %48, 589846, implicit-def dead $scc | |
%1:sreg_32 = S_BFE_U32 %48, 589836, implicit-def dead $scc | |
%2:sreg_32 = S_BFE_U32 %49, 262168, implicit-def dead $scc | |
S_BARRIER | |
%72:sreg_32 = S_MOV_B32 0 | |
S_CMP_LG_U32 %2, %72, implicit-def $scc | |
S_CBRANCH_SCC1 %bb.2, implicit $scc | |
S_BRANCH %bb.1 | |
bb.1..allocReq: | |
successors: %bb.2(0x80000000) | |
%73:sreg_32 = S_MOV_B32 12 | |
%74:sreg_32 = nuw nsw S_LSHL_B32 %0, killed %73, implicit-def dead $scc | |
%75:sreg_32 = S_OR_B32 killed %74, %1, implicit-def dead $scc | |
$m0 = COPY %75 | |
S_SENDMSG 9, implicit $exec, implicit $m0 | |
bb.2..endAllocReq: | |
successors: %bb.3(0x40000000), %bb.4(0x40000000) | |
%77:sreg_32 = S_MOV_B32 -1 | |
%78:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %77, %72, implicit $exec | |
%79:vgpr_32 = V_MBCNT_HI_U32_B32_e64 %77, killed %78, implicit $exec | |
%80:sreg_32 = S_MOV_B32 6 | |
%3:vgpr_32 = V_LSHL_OR_B32_e64 %2, killed %80, killed %79, implicit $exec | |
%81:sreg_64 = V_CMP_LT_U32_e64 %3, %0, implicit $exec | |
%4:sreg_64 = SI_IF killed %81, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.3 | |
bb.3..expPrim: | |
successors: %bb.4(0x80000000) | |
%82:sreg_32 = IMPLICIT_DEF | |
%83:vgpr_32 = COPY %82 | |
%84:sreg_32 = IMPLICIT_DEF | |
%85:vgpr_32 = COPY %84 | |
%86:sreg_32 = IMPLICIT_DEF | |
%87:vgpr_32 = COPY %86 | |
EXP_DONE 20, %61, %83, %85, %87, 0, 0, 1, implicit $exec | |
bb.4..endExpPrim: | |
successors: %bb.5(0x40000000), %bb.12(0x40000000) | |
SI_END_CF %4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
%88:sreg_64 = V_CMP_LT_U32_e64 %3, %1, implicit $exec | |
%5:sreg_64 = SI_IF killed %88, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.5 | |
bb.5..expVert: | |
successors: %bb.6(0x40000000), %bb.7(0x40000000) | |
%92:sreg_32 = COPY %70.sub2 | |
%93:sreg_32 = COPY %70.sub4 | |
%94:sreg_32 = COPY %70.sub5 | |
%95:sreg_64 = S_GETPC_B64 | |
%96:sreg_32 = COPY %95.sub1 | |
%97:sreg_64 = REG_SEQUENCE killed %92, %subreg.sub0, %96, %subreg.sub1 | |
%98:vgpr_32 = V_ADD_U32_e64 killed %94, %66, 0, implicit $exec | |
%99:sreg_64 = REG_SEQUENCE killed %93, %subreg.sub0, %96, %subreg.sub1 | |
%6:sgpr_128 = S_LOAD_DWORDX4_IMM killed %97, 0, 0 :: (load (s128) from %ir.25, addrspace 4) | |
%100:sgpr_128 = S_LOAD_DWORDX4_IMM killed %99, 0, 0 :: (invariant load (s128) from %ir.24, addrspace 4) | |
%101:sreg_32 = S_MOV_B32 0 | |
%7:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_IDXEN killed %98, killed %100, %101, 0, 77, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) | |
%102:sreg_32 = S_MOV_B32 -1 | |
%103:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %102, %101, implicit $exec | |
%8:vgpr_32 = V_MBCNT_HI_U32_B32_e64 %102, killed %103, implicit $exec | |
%90:vgpr_32 = V_MOV_B32_e32 1, implicit $exec | |
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %89, 2147483657 /* reguse tiedto:$0 */, %90(tied-def 3) | |
%104:sreg_64 = V_CMP_NE_U32_e64 %89, %101, implicit $exec | |
%105:sreg_64 = COPY %104 | |
%106:sreg_64 = V_CMP_NE_U32_e64 %8, %101, implicit $exec | |
%10:sreg_64 = COPY %106 | |
%292:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | |
%11:sreg_64 = SI_IF %106, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.6 | |
bb.6..lr.ph16.i.preheader: | |
successors: %bb.8(0x80000000) | |
%107:sreg_64 = S_MOV_B64 0 | |
%296:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | |
%113:sreg_32 = S_MOV_B32 1 | |
%120:sreg_64 = S_MOV_B64 1 | |
%122:sreg_32 = S_MOV_B32 16 | |
S_BRANCH %bb.8 | |
bb.7.Flow13: | |
successors: %bb.13(0x80000000) | |
%12:vgpr_32 = PHI %292, %bb.5, %22, %bb.11 | |
SI_END_CF %11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.13 | |
bb.8..lr.ph16.i: | |
successors: %bb.9(0x40000000), %bb.10(0x40000000) | |
%13:sreg_64 = PHI %107, %bb.6, %21, %bb.10 | |
%14:sreg_32 = PHI %101, %bb.6, %20, %bb.10 | |
%15:sreg_64 = PHI %107, %bb.6, %19, %bb.10 | |
%295:vgpr_32 = PHI %296, %bb.6, %293, %bb.10 | |
%110:sreg_32 = COPY %15.sub0 | |
%111:sreg_64 = S_LSHR_B64 %105, killed %110, implicit-def dead $scc | |
%112:sreg_32 = COPY %111.sub0 | |
%114:sreg_32 = S_AND_B32 killed %112, %113, implicit-def dead $scc | |
%116:sreg_64 = REG_SEQUENCE killed %114, %subreg.sub0, %101, %subreg.sub1 | |
S_CMP_EQ_U64 killed %116, %107, implicit-def $scc | |
%294:vgpr_32 = COPY %295, implicit $exec | |
S_CBRANCH_SCC1 %bb.10, implicit $scc | |
S_BRANCH %bb.9 | |
bb.9 (%ir-block.40): | |
successors: %bb.10(0x80000000) | |
%118:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %6, %14, 0 :: (dereferenceable invariant load (s32)) | |
%119:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %295, 0, killed %118, 0, 0, implicit $mode, implicit $exec | |
bb.10 (%ir-block.44): | |
successors: %bb.11(0x04000000), %bb.8(0x7c000000) | |
%293:vgpr_32 = PHI %294, %bb.8, %119, %bb.9 | |
%307:sreg_32 = COPY %15.sub0 | |
%308:sreg_32 = COPY %15.sub1 | |
%309:sreg_32 = COPY %120.sub0 | |
%310:sreg_32 = COPY %120.sub1 | |
%305:sreg_32 = S_ADD_U32 %307, %309, implicit-def $scc | |
%306:sreg_32 = S_ADDC_U32 %308, %310, implicit-def $scc, implicit $scc | |
%19:sreg_64 = REG_SEQUENCE %305, %subreg.sub0, %306, %subreg.sub1 | |
%121:sreg_32 = COPY %19.sub0 | |
%20:sreg_32 = S_ADD_I32 %14, %122, implicit-def dead $scc | |
%123:sreg_64 = V_CMP_EQ_U32_e64 %8, killed %121, implicit $exec | |
%21:sreg_64 = SI_IF_BREAK killed %123, %13, implicit-def dead $scc | |
SI_LOOP %21, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.11 | |
bb.11.Flow12: | |
successors: %bb.7(0x80000000) | |
%22:vgpr_32 = PHI %293, %bb.10 | |
SI_END_CF %21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.7 | |
bb.12.Flow14: | |
successors: %bb.24(0x80000000) | |
SI_END_CF %5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.24 | |
bb.13.._crit_edge17.i: | |
successors: %bb.14(0x40000000), %bb.21(0x40000000) | |
%127:sreg_32 = S_MOV_B32 4 | |
%128:vgpr_32 = V_LSHLREV_B32_e64 killed %127, %8, implicit $exec | |
%129:sgpr_32 = S_MOV_B32 0 | |
%24:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN killed %128, %6, %129, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) | |
%125:vgpr_32 = COPY %24 | |
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %124, 2147483657 /* reguse tiedto:$0 */, %125(tied-def 3) | |
%130:vgpr_32 = V_SET_INACTIVE_B32 %124, %129, implicit-def dead $scc, implicit $exec | |
%131:sreg_64 = S_MOV_B64 1 | |
%132:vreg_64 = nuw V_LSHLREV_B64_e64 %8, killed %131, implicit $exec | |
%133:vgpr_32 = COPY %132.sub0 | |
%134:vgpr_32 = COPY %132.sub1 | |
%135:sreg_32 = S_MOV_B32 -305419897 | |
%136:sreg_32 = S_MOV_B32 1698898191 | |
%138:sreg_32 = IMPLICIT_DEF | |
%139:vgpr_32 = COPY %138 | |
%137:vgpr_32 = V_PERMLANE16_B32_e64 -1, killed %130, 0, killed %136, 0, killed %135, %139, 0, implicit $exec | |
%140:sreg_32 = S_MOV_B32 16 | |
%141:sreg_32 = V_READLANE_B32 %137, %140 | |
%142:sreg_32 = S_MOV_B32 48 | |
%143:vgpr_32 = V_WRITELANE_B32 killed %141, killed %142, %137 | |
%144:vgpr_32 = V_WRITELANE_B32 %129, %140, %143 | |
%145:sreg_32 = S_MOV_B32 -1 | |
%147:sreg_32 = IMPLICIT_DEF | |
%148:vgpr_32 = COPY %147 | |
%146:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %144, 0, %129, 0, %145, %148, 0, implicit $exec | |
%149:sreg_32 = S_MOV_B32 65537 | |
%150:vgpr_32 = V_AND_B32_e64 %134, %149, implicit $exec | |
%151:vgpr_32 = V_AND_B32_e64 %133, %149, implicit $exec | |
%297:vreg_64 = REG_SEQUENCE killed %151, %subreg.sub0, killed %150, %subreg.sub1 | |
%153:sreg_64 = S_MOV_B64 0 | |
%154:sreg_64_xexec = V_CMP_EQ_U64_e64 killed %297, %153, implicit $exec | |
%25:sreg_64_xexec = COPY %154 | |
%155:vgpr_32 = V_CNDMASK_B32_e64 0, killed %146, 0, %144, %154, implicit $exec | |
%157:vgpr_32 = COPY %129 | |
%156:vgpr_32 = V_MOV_B32_dpp %157, %155, 273, 15, 15, 0, implicit $exec | |
%158:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %155, 0, killed %156, 0, 0, implicit $mode, implicit $exec | |
%160:vgpr_32 = COPY %129 | |
%159:vgpr_32 = V_MOV_B32_dpp %160, %155, 274, 15, 15, 0, implicit $exec | |
%161:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %158, 0, killed %159, 0, 0, implicit $mode, implicit $exec | |
%163:vgpr_32 = COPY %129 | |
%162:vgpr_32 = V_MOV_B32_dpp %163, %155, 275, 15, 15, 0, implicit $exec | |
%164:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %161, 0, killed %162, 0, 0, implicit $mode, implicit $exec | |
%166:vgpr_32 = COPY %129 | |
%165:vgpr_32 = V_MOV_B32_dpp %166, %164, 276, 15, 14, 0, implicit $exec | |
%167:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %164, 0, killed %165, 0, 0, implicit $mode, implicit $exec | |
%169:vgpr_32 = COPY %129 | |
%168:vgpr_32 = V_MOV_B32_dpp %169, %167, 280, 15, 12, 0, implicit $exec | |
%170:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %167, 0, killed %168, 0, 0, implicit $mode, implicit $exec | |
%172:sreg_32 = IMPLICIT_DEF | |
%173:vgpr_32 = COPY %172 | |
%171:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %170, 0, %145, 0, %145, %173, 0, implicit $exec | |
%174:sreg_32 = S_MOV_B32 -65536 | |
%175:vgpr_32 = V_AND_B32_e64 %134, %174, implicit $exec | |
%176:vgpr_32 = V_AND_B32_e64 %133, %174, implicit $exec | |
%298:vreg_64 = REG_SEQUENCE killed %176, %subreg.sub0, killed %175, %subreg.sub1 | |
%178:sreg_64_xexec = V_CMP_EQ_U64_e64 killed %298, %153, implicit $exec | |
%26:sreg_64_xexec = COPY %178 | |
%180:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %171, 0, %129, %178, implicit $exec | |
%181:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %170, 0, killed %180, 0, 0, implicit $mode, implicit $exec | |
%182:sreg_32 = S_MOV_B32 31 | |
%183:sreg_32 = V_READLANE_B32 %181, killed %182 | |
%184:sreg_32 = S_MOV_B32 32 | |
%185:sreg_64_xexec = V_CMP_LT_U32_e64 %8, killed %184, implicit $exec | |
%27:sreg_64_xexec = COPY %185 | |
%187:vgpr_32 = COPY %129 | |
%186:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %183, 0, %187, %185, implicit $exec | |
%188:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %181, 0, killed %186, 0, 0, implicit $mode, implicit $exec | |
early-clobber %28:vgpr_32 = STRICT_WWM killed %188, implicit $exec | |
%189:vgpr_32 = V_AND_B32_e64 1, %8, implicit $exec | |
%190:sreg_64 = V_CMP_EQ_U32_e64 killed %189, 1, implicit $exec | |
%304:vgpr_32 = V_MOV_B32_e32 2, implicit $exec | |
%29:sreg_64 = SI_IF killed %190, %bb.21, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.14 | |
bb.14 (%ir-block.95): | |
successors: %bb.15(0x40000000), %bb.16(0x40000000) | |
%192:vgpr_32 = V_MOV_B32_e32 1, implicit $exec | |
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %191, 2147483657 /* reguse tiedto:$0 */, %192(tied-def 3) | |
%195:sreg_64 = V_CMP_NE_U32_e64 %191, %129, implicit $exec | |
%196:sreg_64 = COPY %195 | |
%197:sreg_64 = COPY %10 | |
%299:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | |
%31:sreg_64 = SI_IF %197, %bb.16, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.15 | |
bb.15..lr.ph.i.preheader: | |
successors: %bb.17(0x80000000) | |
%199:sreg_32 = S_MOV_B32 0 | |
%198:sreg_64 = S_MOV_B64 0 | |
%303:vgpr_32 = V_MOV_B32_e32 0, implicit $exec | |
%204:sreg_32 = S_MOV_B32 1 | |
%211:sreg_64 = S_MOV_B64 1 | |
%213:sreg_32 = S_MOV_B32 16 | |
S_BRANCH %bb.17 | |
bb.16.Flow10: | |
successors: %bb.22(0x80000000) | |
%32:vgpr_32 = PHI %299, %bb.14, %42, %bb.20 | |
SI_END_CF %31, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.22 | |
bb.17..lr.ph.i: | |
successors: %bb.18(0x40000000), %bb.19(0x40000000) | |
%33:sreg_64 = PHI %198, %bb.15, %41, %bb.19 | |
%34:sreg_32 = PHI %199, %bb.15, %40, %bb.19 | |
%35:sreg_64 = PHI %198, %bb.15, %39, %bb.19 | |
%302:vgpr_32 = PHI %303, %bb.15, %300, %bb.19 | |
%201:sreg_32 = COPY %35.sub0 | |
%202:sreg_64 = S_LSHR_B64 %196, killed %201, implicit-def dead $scc | |
%203:sreg_32 = COPY %202.sub0 | |
%205:sreg_32 = S_AND_B32 killed %203, %204, implicit-def dead $scc | |
%207:sreg_64 = REG_SEQUENCE killed %205, %subreg.sub0, %199, %subreg.sub1 | |
S_CMP_EQ_U64 killed %207, %198, implicit-def $scc | |
%301:vgpr_32 = COPY %302, implicit $exec | |
S_CBRANCH_SCC1 %bb.19, implicit $scc | |
S_BRANCH %bb.18 | |
bb.18 (%ir-block.104): | |
successors: %bb.19(0x80000000) | |
%209:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %6, %34, 0 :: (dereferenceable invariant load (s32)) | |
%210:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %302, 0, killed %209, 0, 0, implicit $mode, implicit $exec | |
bb.19 (%ir-block.108): | |
successors: %bb.20(0x04000000), %bb.17(0x7c000000) | |
%300:vgpr_32 = PHI %301, %bb.17, %210, %bb.18 | |
%313:sreg_32 = COPY %35.sub0 | |
%314:sreg_32 = COPY %35.sub1 | |
%315:sreg_32 = COPY %211.sub0 | |
%316:sreg_32 = COPY %211.sub1 | |
%311:sreg_32 = S_ADD_U32 %313, %315, implicit-def $scc | |
%312:sreg_32 = S_ADDC_U32 %314, %316, implicit-def $scc, implicit $scc | |
%39:sreg_64 = REG_SEQUENCE %311, %subreg.sub0, %312, %subreg.sub1 | |
%212:sreg_32 = COPY %39.sub0 | |
%40:sreg_32 = S_ADD_I32 %34, %213, implicit-def dead $scc | |
%214:sreg_64 = V_CMP_EQ_U32_e64 %8, killed %212, implicit $exec | |
%41:sreg_64 = SI_IF_BREAK killed %214, %33, implicit-def dead $scc | |
SI_LOOP %41, %bb.17, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.20 | |
bb.20.Flow: | |
successors: %bb.16(0x80000000) | |
%42:vgpr_32 = PHI %300, %bb.19 | |
SI_END_CF %41, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.16 | |
bb.21.Flow11: | |
successors: %bb.23(0x80000000) | |
%44:vgpr_32 = PHI %304, %bb.13, %45, %bb.22 | |
SI_END_CF %29, implicit-def dead $exec, implicit-def dead $scc, implicit $exec | |
S_BRANCH %bb.23 | |
bb.22.._crit_edge.i: | |
successors: %bb.21(0x80000000) | |
%216:vgpr_32 = COPY %24 | |
INLINEASM &"; %1", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %215, 2147483657 /* reguse tiedto:$0 */, %216(tied-def 3) | |
%217:sgpr_32 = S_MOV_B32 0 | |
%218:vgpr_32 = V_SET_INACTIVE_B32 %215, %217, implicit-def dead $scc, implicit $exec | |
%219:sreg_32 = S_MOV_B32 -305419897 | |
%220:sreg_32 = S_MOV_B32 1698898191 | |
%222:sreg_32 = IMPLICIT_DEF | |
%223:vgpr_32 = COPY %222 | |
%221:vgpr_32 = V_PERMLANE16_B32_e64 -1, killed %218, 0, killed %220, 0, killed %219, %223, 0, implicit $exec | |
%224:sreg_32 = S_MOV_B32 16 | |
%225:sreg_32 = V_READLANE_B32 %221, %224 | |
%226:sreg_32 = S_MOV_B32 48 | |
%227:vgpr_32 = V_WRITELANE_B32 killed %225, killed %226, %221 | |
%228:vgpr_32 = V_WRITELANE_B32 %217, %224, %227 | |
%229:sreg_32 = S_MOV_B32 -1 | |
%231:sreg_32 = IMPLICIT_DEF | |
%232:vgpr_32 = COPY %231 | |
%230:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %228, 0, %217, 0, %229, %232, 0, implicit $exec | |
%233:vgpr_32 = V_CNDMASK_B32_e64 0, killed %230, 0, %228, %25, implicit $exec | |
%236:vgpr_32 = COPY %217 | |
%235:vgpr_32 = V_MOV_B32_dpp %236, %233, 273, 15, 15, 0, implicit $exec | |
%237:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %233, 0, killed %235, 0, 0, implicit $mode, implicit $exec | |
%239:vgpr_32 = COPY %217 | |
%238:vgpr_32 = V_MOV_B32_dpp %239, %233, 274, 15, 15, 0, implicit $exec | |
%240:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %237, 0, killed %238, 0, 0, implicit $mode, implicit $exec | |
%242:vgpr_32 = COPY %217 | |
%241:vgpr_32 = V_MOV_B32_dpp %242, %233, 275, 15, 15, 0, implicit $exec | |
%243:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, killed %240, 0, killed %241, 0, 0, implicit $mode, implicit $exec | |
%245:vgpr_32 = COPY %217 | |
%244:vgpr_32 = V_MOV_B32_dpp %245, %243, 276, 15, 14, 0, implicit $exec | |
%246:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %243, 0, killed %244, 0, 0, implicit $mode, implicit $exec | |
%248:vgpr_32 = COPY %217 | |
%247:vgpr_32 = V_MOV_B32_dpp %248, %246, 280, 15, 12, 0, implicit $exec | |
%249:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %246, 0, killed %247, 0, 0, implicit $mode, implicit $exec | |
%251:sreg_32 = IMPLICIT_DEF | |
%252:vgpr_32 = COPY %251 | |
%250:vgpr_32 = V_PERMLANEX16_B32_e64 -1, %249, 0, %229, 0, %229, %252, 0, implicit $exec | |
%254:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %250, 0, %217, %26, implicit $exec | |
%256:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %249, 0, killed %254, 0, 0, implicit $mode, implicit $exec | |
%257:sreg_32 = S_MOV_B32 31 | |
%258:sreg_32 = V_READLANE_B32 %256, killed %257 | |
%261:vgpr_32 = COPY %217 | |
%259:vgpr_32 = nnan nsz arcp contract afn reassoc V_CNDMASK_B32_e64 0, killed %258, 0, %261, %27, implicit $exec | |
%262:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_ADD_F32_e64 0, %256, 0, killed %259, 0, 0, implicit $mode, implicit $exec | |
early-clobber %263:vgpr_32 = STRICT_WWM killed %262, implicit $exec | |
%264:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_SUB_F32_e64 0, %32, 0, killed %263, 0, 0, implicit $mode, implicit $exec | |
%265:sgpr_32 = S_MOV_B32 925353388 | |
%266:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 2, killed %264, 0, killed %265, 0, implicit $mode, implicit $exec | |
%267:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %266, implicit $exec | |
%268:sreg_32 = S_MOV_B32 1 | |
%45:vgpr_32 = V_LSHLREV_B32_e64 killed %268, killed %267, implicit $exec | |
S_BRANCH %bb.21 | |
bb.23.lgc.ngg.ES.main.exit: | |
successors: %bb.12(0x80000000) | |
%269:vgpr_32 = COPY %7.sub3 | |
%270:vgpr_32 = COPY %7.sub2 | |
%271:vgpr_32 = COPY %7.sub1 | |
%272:vgpr_32 = COPY %7.sub0 | |
%273:vgpr_32 = nnan nsz arcp contract afn reassoc nofpexcept V_SUB_F32_e64 0, %12, 0, %28, 0, 0, implicit $mode, implicit $exec | |
%274:sgpr_32 = S_MOV_B32 925353388 | |
%275:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 2, killed %273, 0, killed %274, 0, implicit $mode, implicit $exec | |
%276:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %275, implicit $exec | |
%277:vgpr_32 = V_OR_B32_e64 %44, killed %276, implicit $exec | |
%278:vgpr_32 = V_CVT_F32_UBYTE0_e64 killed %277, 0, 0, implicit $exec | |
EXP 12, killed %272, killed %271, killed %270, killed %269, 0, 0, 15, implicit $exec | |
%279:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec | |
%280:sgpr_32 = IMPLICIT_DEF | |
%281:vgpr_32 = COPY %280 | |
%282:sgpr_32 = IMPLICIT_DEF | |
%283:vgpr_32 = COPY %282 | |
%284:sgpr_32 = IMPLICIT_DEF | |
%285:vgpr_32 = COPY %284 | |
EXP_DONE 13, killed %279, %281, %283, %285, 0, 0, 1, implicit $exec | |
%286:sgpr_32 = IMPLICIT_DEF | |
%287:vgpr_32 = COPY %286 | |
%288:sgpr_32 = IMPLICIT_DEF | |
%289:vgpr_32 = COPY %288 | |
%290:sgpr_32 = IMPLICIT_DEF | |
%291:vgpr_32 = COPY %290 | |
EXP 32, killed %278, %287, %289, %291, 0, 0, 1, implicit $exec | |
S_BRANCH %bb.12 | |
bb.24..endExpVert: | |
S_ENDPGM 0 | |
... | |
--- | |
name: _amdgpu_ps_main | |
alignment: 1 | |
exposesReturnsTwice: false | |
legalized: false | |
regBankSelected: false | |
selected: false | |
failedISel: false | |
tracksRegLiveness: true | |
hasWinCFI: false | |
failsVerification: false | |
tracksDebugUserValues: false | |
registers: | |
- { id: 0, class: sgpr_32, preferred-register: '' } | |
- { id: 1, class: sgpr_32, preferred-register: '' } | |
- { id: 2, class: sgpr_32, preferred-register: '' } | |
- { id: 3, class: vgpr_32, preferred-register: '' } | |
- { id: 4, class: vgpr_32, preferred-register: '' } | |
- { id: 5, class: sgpr_32, preferred-register: '' } | |
- { id: 6, class: vgpr_32, preferred-register: '' } | |
- { id: 7, class: vgpr_32, preferred-register: '' } | |
- { id: 8, class: vgpr_32, preferred-register: '' } | |
- { id: 9, class: sgpr_32, preferred-register: '' } | |
- { id: 10, class: vgpr_32, preferred-register: '' } | |
- { id: 11, class: sgpr_32, preferred-register: '' } | |
- { id: 12, class: vgpr_32, preferred-register: '' } | |
- { id: 13, class: sgpr_32, preferred-register: '' } | |
- { id: 14, class: vgpr_32, preferred-register: '' } | |
liveins: | |
- { reg: '$sgpr2', virtual-reg: '%2' } | |
- { reg: '$vgpr0', virtual-reg: '%3' } | |
- { reg: '$vgpr1', virtual-reg: '%4' } | |
frameInfo: | |
isFrameAddressTaken: false | |
isReturnAddressTaken: false | |
hasStackMap: false | |
hasPatchPoint: false | |
stackSize: 0 | |
offsetAdjustment: 0 | |
maxAlignment: 1 | |
adjustsStack: false | |
hasCalls: false | |
stackProtector: '' | |
maxCallFrameSize: 4294967295 | |
cvBytesOfCalleeSavedRegisters: 0 | |
hasOpaqueSPAdjustment: false | |
hasVAStart: false | |
hasMustTailInVarArgFunc: false | |
hasTailCall: false | |
localFrameSize: 0 | |
savePoint: '' | |
restorePoint: '' | |
fixedStack: [] | |
stack: [] | |
callSites: [] | |
debugValueSubstitutions: [] | |
constants: [] | |
machineFunctionInfo: | |
explicitKernArgSize: 0 | |
maxKernArgAlign: 4 | |
ldsSize: 0 | |
dynLDSAlign: 1 | |
isEntryFunction: true | |
noSignedZerosFPMath: false | |
memoryBound: false | |
waveLimiter: false | |
hasSpilledSGPRs: false | |
hasSpilledVGPRs: false | |
scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' | |
frameOffsetReg: '$fp_reg' | |
stackPtrOffsetReg: '$sgpr32' | |
argumentInfo: | |
privateSegmentWaveByteOffset: { reg: '$sgpr3' } | |
mode: | |
ieee: false | |
dx10-clamp: true | |
fp32-input-denormals: false | |
fp32-output-denormals: false | |
fp64-fp16-input-denormals: true | |
fp64-fp16-output-denormals: true | |
highBitsOf32BitAddress: 0 | |
occupancy: 20 | |
body: | | |
bb.0..entry: | |
liveins: $sgpr2, $vgpr0, $vgpr1 | |
%4:vgpr_32 = COPY $vgpr1 | |
%3:vgpr_32 = COPY $vgpr0 | |
%2:sgpr_32 = COPY $sgpr2 | |
$m0 = COPY %2 | |
%6:vgpr_32 = V_INTERP_P1_F32 %3, 0, 0, implicit $mode, implicit $m0, implicit $exec | |
$m0 = COPY %2 | |
%7:vgpr_32 = V_INTERP_P2_F32 %6, %4, 0, 0, implicit $mode, implicit $m0, implicit $exec | |
%8:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed %7, 0, 0, implicit $mode, implicit $exec | |
%9:sgpr_32 = IMPLICIT_DEF | |
%10:vgpr_32 = COPY %9 | |
%11:sgpr_32 = IMPLICIT_DEF | |
%12:vgpr_32 = COPY %11 | |
%13:sgpr_32 = IMPLICIT_DEF | |
%14:vgpr_32 = COPY %13 | |
EXP_DONE 0, killed %8, %10, %12, %14, -1, 0, 1, implicit $exec | |
S_ENDPGM 0 | |
... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment