Skip to content

Instantly share code, notes, and snippets.

@dc03-work
Created February 15, 2024 11:16
Show Gist options
  • Save dc03-work/3d749a7be0dc893d86d2df0fbc31709a to your computer and use it in GitHub Desktop.
Save dc03-work/3d749a7be0dc893d86d2df0fbc31709a to your computer and use it in GitHub Desktop.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 622a2b9cceb4..9a049d58c4d3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1070,13 +1070,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{s16, v8s16},
{s32, v2s32},
{s32, v4s32}})
- .moreElementsIf(
- [=](const LegalityQuery &Query) {
- return Query.Types[1].isVector() &&
- Query.Types[1].getElementType() != s8 &&
- Query.Types[1].getNumElements() & 1;
- },
- LegalizeMutations::moreElementsToNextPow2(1))
+ // .moreElementsIf(
+ // [=](const LegalityQuery &Query) {
+ // return Query.Types[1].isVector() &&
+ // Query.Types[1].getElementType() != s8 &&
+ // Query.Types[1].getNumElements() & 1;
+ // },
+ // LegalizeMutations::moreElementsToNextPow2(1))
+ .moreElementsToNextPow2(1)
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 76790d128d06..cddec99740d9 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -492,12 +492,27 @@ define i8 @sminv_v3i8(<3 x i8> %a) {
;
; CHECK-GI-LABEL: sminv_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sxtb w8, w0
-; CHECK-GI-NEXT: cmp w8, w1, sxtb
-; CHECK-GI-NEXT: csel w8, w0, w1, lt
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov w8, #127 // =0x7f
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: cmp w9, w2, sxtb
-; CHECK-GI-NEXT: csel w0, w8, w2, lt
+; CHECK-GI-NEXT: csel w10, w10, w12, lt
+; CHECK-GI-NEXT: cmp w9, w10, sxtb
+; CHECK-GI-NEXT: csel w0, w8, w10, lt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> %a)
@@ -826,12 +841,27 @@ define i8 @smaxv_v3i8(<3 x i8> %a) {
;
; CHECK-GI-LABEL: smaxv_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sxtb w8, w0
-; CHECK-GI-NEXT: cmp w8, w1, sxtb
-; CHECK-GI-NEXT: csel w8, w0, w1, gt
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov w8, #65408 // =0xff80
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, gt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: cmp w9, w2, sxtb
-; CHECK-GI-NEXT: csel w0, w8, w2, gt
+; CHECK-GI-NEXT: csel w10, w10, w12, gt
+; CHECK-GI-NEXT: cmp w9, w10, sxtb
+; CHECK-GI-NEXT: csel w0, w8, w10, gt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> %a)
@@ -1168,12 +1198,27 @@ define i8 @uminv_v3i8(<3 x i8> %a) {
;
; CHECK-GI-LABEL: uminv_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: and w8, w0, #0xff
-; CHECK-GI-NEXT: cmp w8, w1, uxtb
-; CHECK-GI-NEXT: csel w8, w0, w1, lo
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w2, uxtb
-; CHECK-GI-NEXT: csel w0, w8, w2, lo
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, lo
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, lo
+; CHECK-GI-NEXT: and w10, w8, #0xff
+; CHECK-GI-NEXT: cmp w10, w9, uxtb
+; CHECK-GI-NEXT: csel w0, w8, w9, lo
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> %a)
@@ -1509,12 +1554,27 @@ define i8 @umaxv_v3i8(<3 x i8> %a) {
;
; CHECK-GI-LABEL: umaxv_v3i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: and w8, w0, #0xff
-; CHECK-GI-NEXT: cmp w8, w1, uxtb
-; CHECK-GI-NEXT: csel w8, w0, w1, hi
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w2, uxtb
-; CHECK-GI-NEXT: csel w0, w8, w2, hi
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, hi
+; CHECK-GI-NEXT: and w10, w8, #0xff
+; CHECK-GI-NEXT: cmp w10, w9, uxtb
+; CHECK-GI-NEXT: csel w0, w8, w9, hi
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 6d848e7b5c7c..22fcf0ecccf4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -144,12 +144,27 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: test_v3i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: and w8, w0, #0xff
-; CHECK-GI-NEXT: cmp w8, w1, uxtb
-; CHECK-GI-NEXT: csel w8, w0, w1, hi
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w2, uxtb
-; CHECK-GI-NEXT: csel w0, w8, w2, hi
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, hi
+; CHECK-GI-NEXT: and w10, w8, #0xff
+; CHECK-GI-NEXT: cmp w10, w9, uxtb
+; CHECK-GI-NEXT: csel w0, w8, w9, hi
; CHECK-GI-NEXT: ret
%b = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
ret i8 %b
@@ -167,33 +182,16 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: test_v9i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: umov w8, v0.b[0]
-; CHECK-GI-NEXT: umov w9, v0.b[1]
-; CHECK-GI-NEXT: umov w10, v0.b[2]
-; CHECK-GI-NEXT: fmov w11, s1
-; CHECK-GI-NEXT: cmp w8, w11, uxtb
-; CHECK-GI-NEXT: umov w11, v0.b[3]
-; CHECK-GI-NEXT: csel w8, w8, w9, hi
-; CHECK-GI-NEXT: umov w9, v0.b[4]
-; CHECK-GI-NEXT: cmp w10, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w10, lo
-; CHECK-GI-NEXT: umov w10, v0.b[5]
-; CHECK-GI-NEXT: cmp w11, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w11, lo
-; CHECK-GI-NEXT: umov w11, v0.b[6]
-; CHECK-GI-NEXT: cmp w9, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w9, lo
-; CHECK-GI-NEXT: umov w9, v0.b[7]
-; CHECK-GI-NEXT: cmp w10, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w10, lo
-; CHECK-GI-NEXT: umov w10, v0.b[8]
-; CHECK-GI-NEXT: cmp w11, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w11, lo
-; CHECK-GI-NEXT: cmp w9, w8, uxtb
-; CHECK-GI-NEXT: csel w8, w8, w9, lo
-; CHECK-GI-NEXT: cmp w10, w8, uxtb
-; CHECK-GI-NEXT: csel w0, w8, w10, lo
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: mov v0.b[9], w8
+; CHECK-GI-NEXT: mov v0.b[10], w8
+; CHECK-GI-NEXT: mov v0.b[11], w8
+; CHECK-GI-NEXT: mov v0.b[12], w8
+; CHECK-GI-NEXT: mov v0.b[13], w8
+; CHECK-GI-NEXT: mov v0.b[14], w8
+; CHECK-GI-NEXT: mov v0.b[15], w8
+; CHECK-GI-NEXT: umaxv b0, v0.16b
+; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
%b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
ret i8 %b
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment