-
-
Save dc03-work/3d749a7be0dc893d86d2df0fbc31709a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | |
index 622a2b9cceb4..9a049d58c4d3 100644 | |
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | |
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp | |
@@ -1070,13 +1070,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) | |
{s16, v8s16}, | |
{s32, v2s32}, | |
{s32, v4s32}}) | |
- .moreElementsIf( | |
- [=](const LegalityQuery &Query) { | |
- return Query.Types[1].isVector() && | |
- Query.Types[1].getElementType() != s8 && | |
- Query.Types[1].getNumElements() & 1; | |
- }, | |
- LegalizeMutations::moreElementsToNextPow2(1)) | |
+ // .moreElementsIf( | |
+ // [=](const LegalityQuery &Query) { | |
+ // return Query.Types[1].isVector() && | |
+ // Query.Types[1].getElementType() != s8 && | |
+ // Query.Types[1].getNumElements() & 1; | |
+ // }, | |
+ // LegalizeMutations::moreElementsToNextPow2(1)) | |
+ .moreElementsToNextPow2(1) | |
.clampMaxNumElements(1, s64, 2) | |
.clampMaxNumElements(1, s32, 4) | |
.clampMaxNumElements(1, s16, 8) | |
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | |
index 76790d128d06..cddec99740d9 100644 | |
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | |
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll | |
@@ -492,12 +492,27 @@ define i8 @sminv_v3i8(<3 x i8> %a) { | |
; | |
; CHECK-GI-LABEL: sminv_v3i8: | |
; CHECK-GI: // %bb.0: // %entry | |
-; CHECK-GI-NEXT: sxtb w8, w0 | |
-; CHECK-GI-NEXT: cmp w8, w1, sxtb | |
-; CHECK-GI-NEXT: csel w8, w0, w1, lt | |
+; CHECK-GI-NEXT: fmov s0, w0 | |
+; CHECK-GI-NEXT: fmov s1, w1 | |
+; CHECK-GI-NEXT: mov w8, #127 // =0x7f | |
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] | |
+; CHECK-GI-NEXT: fmov s1, w2 | |
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], w8 | |
+; CHECK-GI-NEXT: umov w8, v0.h[0] | |
+; CHECK-GI-NEXT: umov w9, v0.h[1] | |
+; CHECK-GI-NEXT: umov w10, v0.h[2] | |
+; CHECK-GI-NEXT: umov w12, v0.h[3] | |
+; CHECK-GI-NEXT: sxtb w11, w8 | |
+; CHECK-GI-NEXT: cmp w11, w9, sxtb | |
+; CHECK-GI-NEXT: sxtb w11, w10 | |
+; CHECK-GI-NEXT: csel w8, w8, w9, lt | |
+; CHECK-GI-NEXT: cmp w11, w12, sxtb | |
; CHECK-GI-NEXT: sxtb w9, w8 | |
-; CHECK-GI-NEXT: cmp w9, w2, sxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w2, lt | |
+; CHECK-GI-NEXT: csel w10, w10, w12, lt | |
+; CHECK-GI-NEXT: cmp w9, w10, sxtb | |
+; CHECK-GI-NEXT: csel w0, w8, w10, lt | |
; CHECK-GI-NEXT: ret | |
entry: | |
%arg1 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> %a) | |
@@ -826,12 +841,27 @@ define i8 @smaxv_v3i8(<3 x i8> %a) { | |
; | |
; CHECK-GI-LABEL: smaxv_v3i8: | |
; CHECK-GI: // %bb.0: // %entry | |
-; CHECK-GI-NEXT: sxtb w8, w0 | |
-; CHECK-GI-NEXT: cmp w8, w1, sxtb | |
-; CHECK-GI-NEXT: csel w8, w0, w1, gt | |
+; CHECK-GI-NEXT: fmov s0, w0 | |
+; CHECK-GI-NEXT: fmov s1, w1 | |
+; CHECK-GI-NEXT: mov w8, #65408 // =0xff80 | |
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] | |
+; CHECK-GI-NEXT: fmov s1, w2 | |
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], w8 | |
+; CHECK-GI-NEXT: umov w8, v0.h[0] | |
+; CHECK-GI-NEXT: umov w9, v0.h[1] | |
+; CHECK-GI-NEXT: umov w10, v0.h[2] | |
+; CHECK-GI-NEXT: umov w12, v0.h[3] | |
+; CHECK-GI-NEXT: sxtb w11, w8 | |
+; CHECK-GI-NEXT: cmp w11, w9, sxtb | |
+; CHECK-GI-NEXT: sxtb w11, w10 | |
+; CHECK-GI-NEXT: csel w8, w8, w9, gt | |
+; CHECK-GI-NEXT: cmp w11, w12, sxtb | |
; CHECK-GI-NEXT: sxtb w9, w8 | |
-; CHECK-GI-NEXT: cmp w9, w2, sxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w2, gt | |
+; CHECK-GI-NEXT: csel w10, w10, w12, gt | |
+; CHECK-GI-NEXT: cmp w9, w10, sxtb | |
+; CHECK-GI-NEXT: csel w0, w8, w10, gt | |
; CHECK-GI-NEXT: ret | |
entry: | |
%arg1 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> %a) | |
@@ -1168,12 +1198,27 @@ define i8 @uminv_v3i8(<3 x i8> %a) { | |
; | |
; CHECK-GI-LABEL: uminv_v3i8: | |
; CHECK-GI: // %bb.0: // %entry | |
-; CHECK-GI-NEXT: and w8, w0, #0xff | |
-; CHECK-GI-NEXT: cmp w8, w1, uxtb | |
-; CHECK-GI-NEXT: csel w8, w0, w1, lo | |
-; CHECK-GI-NEXT: and w9, w8, #0xff | |
-; CHECK-GI-NEXT: cmp w9, w2, uxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w2, lo | |
+; CHECK-GI-NEXT: fmov s0, w0 | |
+; CHECK-GI-NEXT: fmov s1, w1 | |
+; CHECK-GI-NEXT: mov w8, #65535 // =0xffff | |
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] | |
+; CHECK-GI-NEXT: fmov s1, w2 | |
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], w8 | |
+; CHECK-GI-NEXT: umov w8, v0.h[0] | |
+; CHECK-GI-NEXT: umov w9, v0.h[1] | |
+; CHECK-GI-NEXT: umov w10, v0.h[2] | |
+; CHECK-GI-NEXT: umov w11, v0.h[3] | |
+; CHECK-GI-NEXT: and w12, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w12, w9, uxtb | |
+; CHECK-GI-NEXT: and w12, w10, #0xff | |
+; CHECK-GI-NEXT: csel w8, w8, w9, lo | |
+; CHECK-GI-NEXT: cmp w12, w11, uxtb | |
+; CHECK-GI-NEXT: csel w9, w10, w11, lo | |
+; CHECK-GI-NEXT: and w10, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w10, w9, uxtb | |
+; CHECK-GI-NEXT: csel w0, w8, w9, lo | |
; CHECK-GI-NEXT: ret | |
entry: | |
%arg1 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> %a) | |
@@ -1509,12 +1554,27 @@ define i8 @umaxv_v3i8(<3 x i8> %a) { | |
; | |
; CHECK-GI-LABEL: umaxv_v3i8: | |
; CHECK-GI: // %bb.0: // %entry | |
-; CHECK-GI-NEXT: and w8, w0, #0xff | |
-; CHECK-GI-NEXT: cmp w8, w1, uxtb | |
-; CHECK-GI-NEXT: csel w8, w0, w1, hi | |
-; CHECK-GI-NEXT: and w9, w8, #0xff | |
-; CHECK-GI-NEXT: cmp w9, w2, uxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w2, hi | |
+; CHECK-GI-NEXT: fmov s0, w0 | |
+; CHECK-GI-NEXT: fmov s1, w1 | |
+; CHECK-GI-NEXT: mov w8, #0 // =0x0 | |
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] | |
+; CHECK-GI-NEXT: fmov s1, w2 | |
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], w8 | |
+; CHECK-GI-NEXT: umov w8, v0.h[0] | |
+; CHECK-GI-NEXT: umov w9, v0.h[1] | |
+; CHECK-GI-NEXT: umov w10, v0.h[2] | |
+; CHECK-GI-NEXT: umov w11, v0.h[3] | |
+; CHECK-GI-NEXT: and w12, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w12, w9, uxtb | |
+; CHECK-GI-NEXT: and w12, w10, #0xff | |
+; CHECK-GI-NEXT: csel w8, w8, w9, hi | |
+; CHECK-GI-NEXT: cmp w12, w11, uxtb | |
+; CHECK-GI-NEXT: csel w9, w10, w11, hi | |
+; CHECK-GI-NEXT: and w10, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w10, w9, uxtb | |
+; CHECK-GI-NEXT: csel w0, w8, w9, hi | |
; CHECK-GI-NEXT: ret | |
entry: | |
%arg1 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a) | |
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll | |
index 6d848e7b5c7c..22fcf0ecccf4 100644 | |
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll | |
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll | |
@@ -144,12 +144,27 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind { | |
; | |
; CHECK-GI-LABEL: test_v3i8: | |
; CHECK-GI: // %bb.0: | |
-; CHECK-GI-NEXT: and w8, w0, #0xff | |
-; CHECK-GI-NEXT: cmp w8, w1, uxtb | |
-; CHECK-GI-NEXT: csel w8, w0, w1, hi | |
-; CHECK-GI-NEXT: and w9, w8, #0xff | |
-; CHECK-GI-NEXT: cmp w9, w2, uxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w2, hi | |
+; CHECK-GI-NEXT: fmov s0, w0 | |
+; CHECK-GI-NEXT: fmov s1, w1 | |
+; CHECK-GI-NEXT: mov w8, #0 // =0x0 | |
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] | |
+; CHECK-GI-NEXT: fmov s1, w2 | |
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] | |
+; CHECK-GI-NEXT: mov v0.h[3], w8 | |
+; CHECK-GI-NEXT: umov w8, v0.h[0] | |
+; CHECK-GI-NEXT: umov w9, v0.h[1] | |
+; CHECK-GI-NEXT: umov w10, v0.h[2] | |
+; CHECK-GI-NEXT: umov w11, v0.h[3] | |
+; CHECK-GI-NEXT: and w12, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w12, w9, uxtb | |
+; CHECK-GI-NEXT: and w12, w10, #0xff | |
+; CHECK-GI-NEXT: csel w8, w8, w9, hi | |
+; CHECK-GI-NEXT: cmp w12, w11, uxtb | |
+; CHECK-GI-NEXT: csel w9, w10, w11, hi | |
+; CHECK-GI-NEXT: and w10, w8, #0xff | |
+; CHECK-GI-NEXT: cmp w10, w9, uxtb | |
+; CHECK-GI-NEXT: csel w0, w8, w9, hi | |
; CHECK-GI-NEXT: ret | |
%b = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a) | |
ret i8 %b | |
@@ -167,33 +182,16 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { | |
; | |
; CHECK-GI-LABEL: test_v9i8: | |
; CHECK-GI: // %bb.0: | |
-; CHECK-GI-NEXT: mov b1, v0.b[1] | |
-; CHECK-GI-NEXT: umov w8, v0.b[0] | |
-; CHECK-GI-NEXT: umov w9, v0.b[1] | |
-; CHECK-GI-NEXT: umov w10, v0.b[2] | |
-; CHECK-GI-NEXT: fmov w11, s1 | |
-; CHECK-GI-NEXT: cmp w8, w11, uxtb | |
-; CHECK-GI-NEXT: umov w11, v0.b[3] | |
-; CHECK-GI-NEXT: csel w8, w8, w9, hi | |
-; CHECK-GI-NEXT: umov w9, v0.b[4] | |
-; CHECK-GI-NEXT: cmp w10, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w10, lo | |
-; CHECK-GI-NEXT: umov w10, v0.b[5] | |
-; CHECK-GI-NEXT: cmp w11, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w11, lo | |
-; CHECK-GI-NEXT: umov w11, v0.b[6] | |
-; CHECK-GI-NEXT: cmp w9, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w9, lo | |
-; CHECK-GI-NEXT: umov w9, v0.b[7] | |
-; CHECK-GI-NEXT: cmp w10, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w10, lo | |
-; CHECK-GI-NEXT: umov w10, v0.b[8] | |
-; CHECK-GI-NEXT: cmp w11, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w11, lo | |
-; CHECK-GI-NEXT: cmp w9, w8, uxtb | |
-; CHECK-GI-NEXT: csel w8, w8, w9, lo | |
-; CHECK-GI-NEXT: cmp w10, w8, uxtb | |
-; CHECK-GI-NEXT: csel w0, w8, w10, lo | |
+; CHECK-GI-NEXT: mov w8, #0 // =0x0 | |
+; CHECK-GI-NEXT: mov v0.b[9], w8 | |
+; CHECK-GI-NEXT: mov v0.b[10], w8 | |
+; CHECK-GI-NEXT: mov v0.b[11], w8 | |
+; CHECK-GI-NEXT: mov v0.b[12], w8 | |
+; CHECK-GI-NEXT: mov v0.b[13], w8 | |
+; CHECK-GI-NEXT: mov v0.b[14], w8 | |
+; CHECK-GI-NEXT: mov v0.b[15], w8 | |
+; CHECK-GI-NEXT: umaxv b0, v0.16b | |
+; CHECK-GI-NEXT: fmov w0, s0 | |
; CHECK-GI-NEXT: ret | |
%b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a) | |
ret i8 %b |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment