-
-
Save nikic/507f6ee3276e66d76b0d4a0c2b9ad7ce to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 8b2c6e753c2a1b9c52645767a64652a9828ebdb7 | |
Author: Nikita Popov <npopov@redhat.com> | |
Date: Thu Dec 15 10:31:22 2022 +0100 | |
wip | |
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td | |
index 2e6321326954..8795b27ecf69 100644 | |
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td | |
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td | |
@@ -218,10 +218,10 @@ def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty], | |
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>; | |
def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">, | |
- Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; | |
+ Intrinsic<[], [], [IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; | |
def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">, | |
- Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; | |
+ Intrinsic<[], [], [IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; | |
// The 1st parameter is a mask for the types of instructions that may be allowed | |
// to cross the SCHED_BARRIER during scheduling. | |
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td | |
index 723a38b2ecc6..030c28178dfc 100644 | |
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td | |
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td | |
@@ -315,8 +315,8 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), | |
let SchedRW = []; | |
let hasNoSchedulingInfo = 1; | |
let hasSideEffects = 1; | |
- let mayLoad = 0; | |
- let mayStore = 0; | |
+ let mayLoad = 1; | |
+ let mayStore = 1; | |
let isConvergent = 1; | |
let FixedSize = 1; | |
let Size = 0; | |
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td | |
index ed4dc5481ce8..347a0cd3eb9d 100644 | |
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td | |
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td | |
@@ -1272,6 +1272,8 @@ def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "", | |
let simm16 = 0; | |
let fixed_imm = 1; | |
let isConvergent = 1; | |
+ let mayLoad = 1; | |
+ let mayStore = 1; | |
} | |
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > { | |
diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll | |
index 68f481316944..96e2aca25bc0 100644 | |
--- a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll | |
+++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll | |
@@ -19,12 +19,12 @@ define amdgpu_kernel void @same_address_fence_merge_write2() #0 { | |
; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset0:132 offset1:198 | |
; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset0:8 offset1:74 | |
; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset0:140 offset1:206 | |
+; GCN-NEXT: s_waitcnt lgkmcnt(0) | |
+; GCN-NEXT: s_barrier | |
; GCN-NEXT: s_mov_b32 s1, 0x3ff00000 | |
; GCN-NEXT: v_mov_b32_e32 v0, s0 | |
; GCN-NEXT: v_mov_b32_e32 v1, s1 | |
; GCN-NEXT: s_waitcnt lgkmcnt(0) | |
-; GCN-NEXT: s_barrier | |
-; GCN-NEXT: s_waitcnt lgkmcnt(0) | |
; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:66 | |
; GCN-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset0:132 offset1:198 | |
; GCN-NEXT: ds_write2_b64 v3, v[0:1], v[0:1] offset0:8 offset1:74 | |
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | |
index 48c4e0276edd..f9f92448957e 100644 | |
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | |
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | |
@@ -16,9 +16,9 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 { | |
; VARIANT0-NEXT: v_not_b32_e32 v3, v0 | |
; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) | |
; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 | |
+; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s0, v3 | |
; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) | |
; VARIANT0-NEXT: s_barrier | |
-; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s0, v3 | |
; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 | |
; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 | |
; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 | |
@@ -37,8 +37,8 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 { | |
; VARIANT1-NEXT: v_not_b32_e32 v3, v0 | |
; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) | |
; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 | |
-; VARIANT1-NEXT: s_barrier | |
; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s0, v3 | |
+; VARIANT1-NEXT: s_barrier | |
; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 | |
; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 | |
; VARIANT1-NEXT: s_waitcnt expcnt(0) | |
@@ -54,14 +54,14 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 { | |
; VARIANT2-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) | |
; VARIANT2-NEXT: global_store_dword v2, v0, s[2:3] | |
+; VARIANT2-NEXT: s_waitcnt vmcnt(0) | |
+; VARIANT2-NEXT: s_barrier | |
; VARIANT2-NEXT: v_xad_u32 v0, v0, -1, s4 | |
; VARIANT2-NEXT: v_ashrrev_i32_e32 v1, 31, v0 | |
; VARIANT2-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] | |
; VARIANT2-NEXT: v_mov_b32_e32 v3, s3 | |
; VARIANT2-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 | |
; VARIANT2-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc | |
-; VARIANT2-NEXT: s_waitcnt vmcnt(0) | |
-; VARIANT2-NEXT: s_barrier | |
; VARIANT2-NEXT: global_load_dword v0, v[0:1], off | |
; VARIANT2-NEXT: s_waitcnt vmcnt(0) | |
; VARIANT2-NEXT: global_store_dword v2, v0, s[2:3] | |
@@ -74,13 +74,13 @@ define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 { | |
; VARIANT3-NEXT: v_lshlrev_b32_e32 v2, 2, v0 | |
; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) | |
; VARIANT3-NEXT: global_store_dword v2, v0, s[2:3] | |
+; VARIANT3-NEXT: s_barrier | |
; VARIANT3-NEXT: v_xad_u32 v0, v0, -1, s4 | |
; VARIANT3-NEXT: v_ashrrev_i32_e32 v1, 31, v0 | |
; VARIANT3-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] | |
; VARIANT3-NEXT: v_mov_b32_e32 v3, s3 | |
; VARIANT3-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0 | |
; VARIANT3-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc | |
-; VARIANT3-NEXT: s_barrier | |
; VARIANT3-NEXT: global_load_dword v0, v[0:1], off | |
; VARIANT3-NEXT: s_waitcnt vmcnt(0) | |
; VARIANT3-NEXT: global_store_dword v2, v0, s[2:3] | |
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll | |
index 27207ea94479..b57871feb07c 100644 | |
--- a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll | |
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll | |
@@ -10,13 +10,13 @@ define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 { | |
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0 | |
; GCN-NEXT: s_mov_b32 m0, -1 | |
; GCN-NEXT: ds_write_b32 v1, v0 | |
+; GCN-NEXT: s_waitcnt lgkmcnt(0) | |
+; GCN-NEXT: s_barrier | |
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v0 | |
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 16, v0 | |
; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc | |
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 | |
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
-; GCN-NEXT: s_waitcnt lgkmcnt(0) | |
-; GCN-NEXT: s_barrier | |
; GCN-NEXT: ds_read_b32 v0, v0 | |
; GCN-NEXT: s_mov_b32 s2, 0 | |
; GCN-NEXT: s_mov_b32 s3, 0xf000 | |
@@ -50,19 +50,18 @@ define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 { | |
; SI-NEXT: v_lshlrev_b32_e32 v2, 1, v0 | |
; SI-NEXT: s_mov_b32 m0, -1 | |
; SI-NEXT: ds_write2_b32 v1, v0, v2 offset1:4 | |
-; SI-NEXT: v_sub_i32_e32 v0, vcc, 12, v1 | |
-; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
; SI-NEXT: s_waitcnt lgkmcnt(0) | |
; SI-NEXT: s_barrier | |
+; SI-NEXT: v_sub_i32_e32 v0, vcc, 12, v1 | |
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
; SI-NEXT: v_sub_i32_e32 v2, vcc, 28, v1 | |
; SI-NEXT: ds_read_b32 v0, v0 | |
; SI-NEXT: ds_read_b32 v3, v2 | |
; SI-NEXT: s_mov_b32 s3, 0xf000 | |
; SI-NEXT: s_mov_b32 s2, 0 | |
; SI-NEXT: v_mov_b32_e32 v2, 0 | |
-; SI-NEXT: s_waitcnt lgkmcnt(1) | |
-; SI-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 | |
; SI-NEXT: s_waitcnt lgkmcnt(0) | |
+; SI-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 | |
; SI-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:16 | |
; SI-NEXT: s_endpgm | |
; | |
@@ -72,10 +71,10 @@ define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 { | |
; CI-NEXT: v_lshlrev_b32_e32 v2, 1, v0 | |
; CI-NEXT: s_mov_b32 m0, -1 | |
; CI-NEXT: ds_write2_b32 v1, v0, v2 offset1:4 | |
-; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v1 | |
-; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
; CI-NEXT: s_waitcnt lgkmcnt(0) | |
; CI-NEXT: s_barrier | |
+; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v1 | |
+; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 | |
; CI-NEXT: ds_read2_b32 v[3:4], v0 offset0:3 offset1:7 | |
; CI-NEXT: s_mov_b32 s3, 0xf000 | |
; CI-NEXT: s_mov_b32 s2, 0 | |
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll | |
index ecf6dd1eb17e..d256a18762a3 100644 | |
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll | |
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll | |
@@ -153,9 +153,9 @@ bb: | |
} | |
; GCN-LABEL: barrier_vmcnt_vscnt_flat_workgroup: | |
-; GCN: flat_load_{{dword|b32}} | |
; GFX8_9: s_waitcnt lgkmcnt(0){{$}} | |
-; GFX8_9: s_waitcnt vmcnt(0){{$}} | |
+; GCN: flat_load_{{dword|b32}} | |
+; GFX8_9: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} | |
; GFX10PLUS: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} | |
; GFX10PLUS: s_waitcnt_vscnt null, 0x0 | |
; GCN-NEXT: s_barrier |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment