-
-
Save nikic/5f1d3b0c78f646f74c84aac68a40de76 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit dec419097a7ab8f78d6df87fb59792c746a5a7c0 | |
Author: Nikita Popov <npopov@redhat.com> | |
Date: Thu May 25 11:11:15 2023 +0200 | |
wip | |
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | |
index a31e648dfd65..9954421c6690 100644 | |
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | |
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | |
@@ -715,7 +715,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { | |
auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; | |
assert((OpIdEntry == 0) && "Node is already promoted!"); | |
OpIdEntry = getTableId(Result); | |
- Result->setFlags(Op->getFlags()); | |
+ //Result->setFlags(Op->getFlags()); | |
DAG.transferDbgValues(Op, Result); | |
} | |
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll | |
index d5e4aafd1b2c..0626d9e1147e 100644 | |
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll | |
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll | |
@@ -873,8 +873,8 @@ define <2 x i16> @hadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: shl.2s v0, v0, #24 | |
; CHECK-NEXT: shl.2s v1, v1, #24 | |
; CHECK-NEXT: sshr.2s v0, v0, #24 | |
-; CHECK-NEXT: sshr.2s v1, v1, #24 | |
-; CHECK-NEXT: shadd.2s v0, v0, v1 | |
+; CHECK-NEXT: ssra.2s v0, v1, #24 | |
+; CHECK-NEXT: sshr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16> | |
@@ -889,7 +889,8 @@ define <2 x i16> @hadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: movi d2, #0x0000ff000000ff | |
; CHECK-NEXT: and.8b v0, v0, v2 | |
; CHECK-NEXT: and.8b v1, v1, v2 | |
-; CHECK-NEXT: uhadd.2s v0, v0, v1 | |
+; CHECK-NEXT: add.2s v0, v0, v1 | |
+; CHECK-NEXT: ushr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16> | |
@@ -922,7 +923,8 @@ define <2 x i16> @hadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: movi d2, #0x0000ff000000ff | |
; CHECK-NEXT: and.8b v0, v0, v2 | |
; CHECK-NEXT: and.8b v1, v1, v2 | |
-; CHECK-NEXT: uhadd.2s v0, v0, v1 | |
+; CHECK-NEXT: add.2s v0, v0, v1 | |
+; CHECK-NEXT: ushr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16> | |
@@ -1004,7 +1006,9 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: shl.2s v1, v1, #24 | |
; CHECK-NEXT: sshr.2s v0, v0, #24 | |
; CHECK-NEXT: sshr.2s v1, v1, #24 | |
-; CHECK-NEXT: srhadd.2s v0, v0, v1 | |
+; CHECK-NEXT: mvn.8b v0, v0 | |
+; CHECK-NEXT: sub.2s v0, v1, v0 | |
+; CHECK-NEXT: sshr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = sext <2 x i8> %src2 to <2 x i16> | |
@@ -1020,7 +1024,9 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: movi d2, #0x0000ff000000ff | |
; CHECK-NEXT: and.8b v0, v0, v2 | |
; CHECK-NEXT: and.8b v1, v1, v2 | |
-; CHECK-NEXT: urhadd.2s v0, v0, v1 | |
+; CHECK-NEXT: mvn.8b v0, v0 | |
+; CHECK-NEXT: sub.2s v0, v1, v0 | |
+; CHECK-NEXT: ushr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16> | |
@@ -1035,12 +1041,12 @@ define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK: // %bb.0: | |
; CHECK-NEXT: shl.2s v0, v0, #24 | |
; CHECK-NEXT: shl.2s v1, v1, #24 | |
-; CHECK-NEXT: movi.2s v2, #1 | |
+; CHECK-NEXT: movi d2, #0x00ffff0000ffff | |
; CHECK-NEXT: sshr.2s v0, v0, #24 | |
-; CHECK-NEXT: ssra.2s v0, v1, #24 | |
-; CHECK-NEXT: movi d1, #0x00ffff0000ffff | |
-; CHECK-NEXT: add.2s v0, v0, v2 | |
-; CHECK-NEXT: and.8b v0, v0, v1 | |
+; CHECK-NEXT: sshr.2s v1, v1, #24 | |
+; CHECK-NEXT: mvn.8b v0, v0 | |
+; CHECK-NEXT: sub.2s v0, v1, v0 | |
+; CHECK-NEXT: and.8b v0, v0, v2 | |
; CHECK-NEXT: ushr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = sext <2 x i8> %src1 to <2 x i16> | |
@@ -1057,7 +1063,9 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) { | |
; CHECK-NEXT: movi d2, #0x0000ff000000ff | |
; CHECK-NEXT: and.8b v0, v0, v2 | |
; CHECK-NEXT: and.8b v1, v1, v2 | |
-; CHECK-NEXT: urhadd.2s v0, v0, v1 | |
+; CHECK-NEXT: mvn.8b v0, v0 | |
+; CHECK-NEXT: sub.2s v0, v1, v0 | |
+; CHECK-NEXT: ushr.2s v0, v0, #1 | |
; CHECK-NEXT: ret | |
%zextsrc1 = zext <2 x i8> %src1 to <2 x i16> | |
%zextsrc2 = zext <2 x i8> %src2 to <2 x i16> | |
diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll | |
index c57f9f2a0cc1..28242547b572 100644 | |
--- a/llvm/test/CodeGen/AArch64/sve-hadd.ll | |
+++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll | |
@@ -219,22 +219,14 @@ entry: | |
} | |
define <vscale x 2 x i16> @hadds_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) { | |
-; SVE-LABEL: hadds_v2i16: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: ptrue p0.d | |
-; SVE-NEXT: sxth z0.d, p0/m, z0.d | |
-; SVE-NEXT: sxth z1.d, p0/m, z1.d | |
-; SVE-NEXT: add z0.d, z0.d, z1.d | |
-; SVE-NEXT: asr z0.d, z0.d, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: hadds_v2i16: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.d | |
-; SVE2-NEXT: sxth z0.d, p0/m, z0.d | |
-; SVE2-NEXT: sxth z1.d, p0/m, z1.d | |
-; SVE2-NEXT: shadd z0.d, p0/m, z0.d, z1.d | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: hadds_v2i16: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: ptrue p0.d | |
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d | |
+; CHECK-NEXT: sxth z1.d, p0/m, z1.d | |
+; CHECK-NEXT: add z0.d, z0.d, z1.d | |
+; CHECK-NEXT: asr z0.d, z0.d, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = sext <vscale x 2 x i16> %s0 to <vscale x 2 x i32> | |
%s1s = sext <vscale x 2 x i16> %s1 to <vscale x 2 x i32> | |
@@ -264,21 +256,13 @@ entry: | |
} | |
define <vscale x 2 x i16> @haddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) { | |
-; SVE-LABEL: haddu_v2i16: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: and z0.d, z0.d, #0xffff | |
-; SVE-NEXT: and z1.d, z1.d, #0xffff | |
-; SVE-NEXT: add z0.d, z0.d, z1.d | |
-; SVE-NEXT: lsr z0.d, z0.d, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: haddu_v2i16: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.d | |
-; SVE2-NEXT: and z0.d, z0.d, #0xffff | |
-; SVE2-NEXT: and z1.d, z1.d, #0xffff | |
-; SVE2-NEXT: uhadd z0.d, p0/m, z0.d, z1.d | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: haddu_v2i16: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: and z0.d, z0.d, #0xffff | |
+; CHECK-NEXT: and z1.d, z1.d, #0xffff | |
+; CHECK-NEXT: add z0.d, z0.d, z1.d | |
+; CHECK-NEXT: lsr z0.d, z0.d, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32> | |
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32> | |
@@ -433,22 +417,14 @@ entry: | |
} | |
define <vscale x 4 x i8> @hadds_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) { | |
-; SVE-LABEL: hadds_v4i8: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: ptrue p0.s | |
-; SVE-NEXT: sxtb z0.s, p0/m, z0.s | |
-; SVE-NEXT: sxtb z1.s, p0/m, z1.s | |
-; SVE-NEXT: add z0.s, z0.s, z1.s | |
-; SVE-NEXT: asr z0.s, z0.s, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: hadds_v4i8: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.s | |
-; SVE2-NEXT: sxtb z0.s, p0/m, z0.s | |
-; SVE2-NEXT: sxtb z1.s, p0/m, z1.s | |
-; SVE2-NEXT: shadd z0.s, p0/m, z0.s, z1.s | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: hadds_v4i8: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: ptrue p0.s | |
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s | |
+; CHECK-NEXT: sxtb z1.s, p0/m, z1.s | |
+; CHECK-NEXT: add z0.s, z0.s, z1.s | |
+; CHECK-NEXT: asr z0.s, z0.s, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = sext <vscale x 4 x i8> %s0 to <vscale x 4 x i16> | |
%s1s = sext <vscale x 4 x i8> %s1 to <vscale x 4 x i16> | |
@@ -478,21 +454,13 @@ entry: | |
} | |
define <vscale x 4 x i8> @haddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) { | |
-; SVE-LABEL: haddu_v4i8: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: and z0.s, z0.s, #0xff | |
-; SVE-NEXT: and z1.s, z1.s, #0xff | |
-; SVE-NEXT: add z0.s, z0.s, z1.s | |
-; SVE-NEXT: lsr z0.s, z0.s, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: haddu_v4i8: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.s | |
-; SVE2-NEXT: and z0.s, z0.s, #0xff | |
-; SVE2-NEXT: and z1.s, z1.s, #0xff | |
-; SVE2-NEXT: uhadd z0.s, p0/m, z0.s, z1.s | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: haddu_v4i8: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: and z0.s, z0.s, #0xff | |
+; CHECK-NEXT: and z1.s, z1.s, #0xff | |
+; CHECK-NEXT: add z0.s, z0.s, z1.s | |
+; CHECK-NEXT: lsr z0.s, z0.s, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16> | |
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16> | |
@@ -916,23 +884,15 @@ entry: | |
} | |
define <vscale x 2 x i16> @rhaddu_v2i16(<vscale x 2 x i16> %s0, <vscale x 2 x i16> %s1) { | |
-; SVE-LABEL: rhaddu_v2i16: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: mov z2.d, #-1 // =0xffffffffffffffff | |
-; SVE-NEXT: and z0.d, z0.d, #0xffff | |
-; SVE-NEXT: and z1.d, z1.d, #0xffff | |
-; SVE-NEXT: eor z0.d, z0.d, z2.d | |
-; SVE-NEXT: sub z0.d, z1.d, z0.d | |
-; SVE-NEXT: lsr z0.d, z0.d, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: rhaddu_v2i16: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.d | |
-; SVE2-NEXT: and z0.d, z0.d, #0xffff | |
-; SVE2-NEXT: and z1.d, z1.d, #0xffff | |
-; SVE2-NEXT: urhadd z0.d, p0/m, z0.d, z1.d | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: rhaddu_v2i16: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff | |
+; CHECK-NEXT: and z0.d, z0.d, #0xffff | |
+; CHECK-NEXT: and z1.d, z1.d, #0xffff | |
+; CHECK-NEXT: eor z0.d, z0.d, z2.d | |
+; CHECK-NEXT: sub z0.d, z1.d, z0.d | |
+; CHECK-NEXT: lsr z0.d, z0.d, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = zext <vscale x 2 x i16> %s0 to <vscale x 2 x i32> | |
%s1s = zext <vscale x 2 x i16> %s1 to <vscale x 2 x i32> | |
@@ -1135,23 +1095,15 @@ entry: | |
} | |
define <vscale x 4 x i8> @rhaddu_v4i8(<vscale x 4 x i8> %s0, <vscale x 4 x i8> %s1) { | |
-; SVE-LABEL: rhaddu_v4i8: | |
-; SVE: // %bb.0: // %entry | |
-; SVE-NEXT: mov z2.s, #-1 // =0xffffffffffffffff | |
-; SVE-NEXT: and z0.s, z0.s, #0xff | |
-; SVE-NEXT: and z1.s, z1.s, #0xff | |
-; SVE-NEXT: eor z0.d, z0.d, z2.d | |
-; SVE-NEXT: sub z0.s, z1.s, z0.s | |
-; SVE-NEXT: lsr z0.s, z0.s, #1 | |
-; SVE-NEXT: ret | |
-; | |
-; SVE2-LABEL: rhaddu_v4i8: | |
-; SVE2: // %bb.0: // %entry | |
-; SVE2-NEXT: ptrue p0.s | |
-; SVE2-NEXT: and z0.s, z0.s, #0xff | |
-; SVE2-NEXT: and z1.s, z1.s, #0xff | |
-; SVE2-NEXT: urhadd z0.s, p0/m, z0.s, z1.s | |
-; SVE2-NEXT: ret | |
+; CHECK-LABEL: rhaddu_v4i8: | |
+; CHECK: // %bb.0: // %entry | |
+; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff | |
+; CHECK-NEXT: and z0.s, z0.s, #0xff | |
+; CHECK-NEXT: and z1.s, z1.s, #0xff | |
+; CHECK-NEXT: eor z0.d, z0.d, z2.d | |
+; CHECK-NEXT: sub z0.s, z1.s, z0.s | |
+; CHECK-NEXT: lsr z0.s, z0.s, #1 | |
+; CHECK-NEXT: ret | |
entry: | |
%s0s = zext <vscale x 4 x i8> %s0 to <vscale x 4 x i16> | |
%s1s = zext <vscale x 4 x i8> %s1 to <vscale x 4 x i16> | |
diff --git a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll | |
index 82a286627a9e..9b64dde23627 100644 | |
--- a/llvm/test/CodeGen/Thumb2/mve-vhadd.ll | |
+++ b/llvm/test/CodeGen/Thumb2/mve-vhadd.ll | |
@@ -116,7 +116,8 @@ define arm_aapcs_vfpcc <4 x i8> @vhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) { | |
; CHECK-NEXT: vmov.i32 q2, #0xff | |
; CHECK-NEXT: vand q1, q1, q2 | |
; CHECK-NEXT: vand q0, q0, q2 | |
-; CHECK-NEXT: vhadd.u32 q0, q0, q1 | |
+; CHECK-NEXT: vadd.i32 q0, q0, q1 | |
+; CHECK-NEXT: vshr.u32 q0, q0, #1 | |
; CHECK-NEXT: bx lr | |
entry: | |
%s0s = zext <4 x i8> %s0 to <4 x i16> | |
@@ -312,9 +313,12 @@ define arm_aapcs_vfpcc <4 x i8> @vrhaddu_v4i8(<4 x i8> %s0, <4 x i8> %s1) { | |
; CHECK-LABEL: vrhaddu_v4i8: | |
; CHECK: @ %bb.0: @ %entry | |
; CHECK-NEXT: vmov.i32 q2, #0xff | |
+; CHECK-NEXT: movs r0, #1 | |
; CHECK-NEXT: vand q1, q1, q2 | |
; CHECK-NEXT: vand q0, q0, q2 | |
-; CHECK-NEXT: vrhadd.u32 q0, q0, q1 | |
+; CHECK-NEXT: vadd.i32 q0, q0, q1 | |
+; CHECK-NEXT: vadd.i32 q0, q0, r0 | |
+; CHECK-NEXT: vshr.u32 q0, q0, #1 | |
; CHECK-NEXT: bx lr | |
entry: | |
%s0s = zext <4 x i8> %s0 to <4 x i16> | |
diff --git a/llvm/test/CodeGen/X86/fmf-propagation.ll b/llvm/test/CodeGen/X86/fmf-propagation.ll | |
index 07982ae17cf9..a07b3e23228e 100644 | |
--- a/llvm/test/CodeGen/X86/fmf-propagation.ll | |
+++ b/llvm/test/CodeGen/X86/fmf-propagation.ll | |
@@ -29,7 +29,7 @@ define float @fmf_transfer(float %x, float %y) { | |
} | |
; CHECK-LABEL: Optimized type-legalized selection DAG: %bb.0 'fmf_setcc:' | |
-; CHECK: t13: i8 = setcc nnan ninf nsz arcp contract afn reassoc t2, ConstantFP:f32<0.000000e+00>, setlt:ch | |
+; CHECK: t13: i8 = setcc t2, ConstantFP:f32<0.000000e+00>, setlt:ch | |
define float @fmf_setcc(float %x, float %y) { | |
%cmp = fcmp fast ult float %x, 0.0 | |
@@ -38,7 +38,7 @@ define float @fmf_setcc(float %x, float %y) { | |
} | |
; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_setcc_canon:' | |
-; CHECK: t14: i8 = setcc nnan ninf nsz arcp contract afn reassoc t2, ConstantFP:f32<0.000000e+00>, setgt:ch | |
+; CHECK: t14: i8 = setcc t2, ConstantFP:f32<0.000000e+00>, setgt:ch | |
define float @fmf_setcc_canon(float %x, float %y) { | |
%cmp = fcmp fast ult float 0.0, %x | |
%ret = select i1 %cmp, float %x, float %y |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment