-
-
Save nikic/9e59e8e7b635a54f0d75aad89b2632d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 4d4fb69830b49e729ee6ecf1c2a468c2f05ffffd | |
Author: Nikita Popov <npopov@redhat.com> | |
Date: Tue Apr 5 16:31:36 2022 +0200 | |
remove entirely | |
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp | |
index 397c61261e3d..adf689d44cd0 100644 | |
--- a/llvm/lib/Target/X86/X86FastISel.cpp | |
+++ b/llvm/lib/Target/X86/X86FastISel.cpp | |
@@ -159,9 +159,6 @@ private: | |
bool TryEmitSmallMemcpy(X86AddressMode DestAM, | |
X86AddressMode SrcAM, uint64_t Len); | |
- bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, | |
- const Value *Cond); | |
- | |
const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, | |
X86AddressMode &AM); | |
@@ -221,70 +218,6 @@ X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, | |
return ::addFullAddress(MIB, AM); | |
} | |
-/// Check if it is possible to fold the condition from the XALU intrinsic | |
-/// into the user. The condition code will only be updated on success. | |
-bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, | |
- const Value *Cond) { | |
- if (!isa<ExtractValueInst>(Cond)) | |
- return false; | |
- | |
- const auto *EV = cast<ExtractValueInst>(Cond); | |
- if (!isa<IntrinsicInst>(EV->getAggregateOperand())) | |
- return false; | |
- | |
- const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); | |
- MVT RetVT; | |
- const Function *Callee = II->getCalledFunction(); | |
- Type *RetTy = | |
- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); | |
- if (!isTypeLegal(RetTy, RetVT)) | |
- return false; | |
- | |
- if (RetVT != MVT::i32 && RetVT != MVT::i64) | |
- return false; | |
- | |
- X86::CondCode TmpCC; | |
- switch (II->getIntrinsicID()) { | |
- default: return false; | |
- case Intrinsic::sadd_with_overflow: | |
- case Intrinsic::ssub_with_overflow: | |
- case Intrinsic::smul_with_overflow: | |
- case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; | |
- case Intrinsic::uadd_with_overflow: | |
- case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; | |
- } | |
- | |
- // Check if both instructions are in the same basic block. | |
- if (II->getParent() != I->getParent()) | |
- return false; | |
- | |
- // Make sure nothing is in the way | |
- BasicBlock::const_iterator Start(I); | |
- BasicBlock::const_iterator End(II); | |
- for (auto Itr = std::prev(Start); Itr != End; --Itr) { | |
- // We only expect extractvalue instructions between the intrinsic and the | |
- // instruction to be selected. | |
- if (!isa<ExtractValueInst>(Itr)) | |
- return false; | |
- | |
- // Check that the extractvalue operand comes from the intrinsic. | |
- const auto *EVI = cast<ExtractValueInst>(Itr); | |
- if (EVI->getAggregateOperand() != II) | |
- return false; | |
- } | |
- | |
- // Make sure no potentially eflags clobbering phi moves can be inserted in | |
- // between. | |
- auto HasPhis = [](const BasicBlock *Succ) { | |
- return !llvm::empty(Succ->phis()); | |
- }; | |
- if (I->isTerminator() && llvm::any_of(successors(I), HasPhis)) | |
- return false; | |
- | |
- CC = TmpCC; | |
- return true; | |
-} | |
- | |
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { | |
EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true); | |
if (evt == MVT::Other || !evt.isSimple()) | |
@@ -1749,17 +1682,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { | |
return true; | |
} | |
} | |
- } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { | |
- // Fake request the condition, otherwise the intrinsic might be completely | |
- // optimized away. | |
- Register TmpReg = getRegForValue(BI->getCondition()); | |
- if (TmpReg == 0) | |
- return false; | |
- | |
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1)) | |
- .addMBB(TrueMBB).addImm(CC); | |
- finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); | |
- return true; | |
} | |
// Otherwise do a clumsy setcc and re-test it. | |
@@ -2092,14 +2014,6 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { | |
} | |
} | |
NeedTest = false; | |
- } else if (foldX86XALUIntrinsic(CC, I, Cond)) { | |
- // Fake request the condition, otherwise the intrinsic might be completely | |
- // optimized away. | |
- Register TmpReg = getRegForValue(Cond); | |
- if (TmpReg == 0) | |
- return false; | |
- | |
- NeedTest = false; | |
} | |
if (NeedTest) { | |
diff --git a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll | |
index 94e1db3840b1..b80b5c48064c 100644 | |
--- a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll | |
+++ b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll | |
@@ -13,6 +13,8 @@ define fastcc i32 @test() nounwind { | |
; FASTISEL: ## %bb.0: ## %entry | |
; FASTISEL-NEXT: movl $1, %eax | |
; FASTISEL-NEXT: addl $0, %eax | |
+; FASTISEL-NEXT: seto %al | |
+; FASTISEL-NEXT: testb $1, %al | |
; FASTISEL-NEXT: xorl %eax, %eax | |
; FASTISEL-NEXT: retq | |
; | |
@@ -21,7 +23,8 @@ define fastcc i32 @test() nounwind { | |
; AVX512F-NEXT: movl $1, %eax | |
; AVX512F-NEXT: addl $0, %eax | |
; AVX512F-NEXT: seto %al | |
-; AVX512F-NEXT: jo LBB0_2 | |
+; AVX512F-NEXT: testb $1, %al | |
+; AVX512F-NEXT: jne LBB0_2 | |
; AVX512F-NEXT: ## %bb.1: ## %BB3 | |
; AVX512F-NEXT: LBB0_2: ## %.backedge | |
; AVX512F-NEXT: xorl %eax, %eax | |
diff --git a/llvm/test/CodeGen/X86/pr54369.ll b/llvm/test/CodeGen/X86/pr54369.ll | |
index 818b4f9d87ea..d0264db13feb 100644 | |
--- a/llvm/test/CodeGen/X86/pr54369.ll | |
+++ b/llvm/test/CodeGen/X86/pr54369.ll | |
@@ -1,16 +1,16 @@ | |
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |
; RUN: llc -mtriple=x86_64-- -O0 < %s | FileCheck %s | |
-; FIXME: This is currently miscompiled due to an eflags clobber. | |
define i64 @adder(i64 %lhs, i64 %rhs) { | |
; CHECK-LABEL: adder: | |
; CHECK: # %bb.0: | |
; CHECK-NEXT: addq %rsi, %rdi | |
-; CHECK-NEXT: seto %al | |
+; CHECK-NEXT: seto %dl | |
; CHECK-NEXT: xorl %eax, %eax | |
; CHECK-NEXT: # kill: def $rax killed $eax | |
; CHECK-NEXT: movl $148, %ecx | |
-; CHECK-NEXT: cmovoq %rcx, %rax | |
+; CHECK-NEXT: testb $1, %dl | |
+; CHECK-NEXT: cmovneq %rcx, %rax | |
; CHECK-NEXT: retq | |
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %lhs, i64 %rhs) | |
%errorbit = extractvalue { i64, i1 } %res, 1 | |
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll | |
index 0de8bdeddd6f..bed76889d7dd 100644 | |
--- a/llvm/test/CodeGen/X86/xaluo.ll | |
+++ b/llvm/test/CodeGen/X86/xaluo.ll | |
@@ -563,7 +563,9 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) { | |
; FAST-NEXT: movl %esi, %eax | |
; FAST-NEXT: movl %edi, %ecx | |
; FAST-NEXT: addl %esi, %ecx | |
-; FAST-NEXT: cmovol %edi, %eax | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
; FAST-NEXT: retq | |
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) | |
%obit = extractvalue {i32, i1} %t, 1 | |
@@ -585,7 +587,9 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) { | |
; FAST-NEXT: movq %rsi, %rax | |
; FAST-NEXT: movq %rdi, %rcx | |
; FAST-NEXT: addq %rsi, %rcx | |
-; FAST-NEXT: cmovoq %rdi, %rax | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovneq %rdi, %rax | |
; FAST-NEXT: retq | |
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) | |
%obit = extractvalue {i64, i1} %t, 1 | |
@@ -607,7 +611,9 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) { | |
; FAST-NEXT: movl %esi, %eax | |
; FAST-NEXT: movl %edi, %ecx | |
; FAST-NEXT: addl %esi, %ecx | |
-; FAST-NEXT: cmovbl %edi, %eax | |
+; FAST-NEXT: setb %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
; FAST-NEXT: retq | |
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) | |
%obit = extractvalue {i32, i1} %t, 1 | |
@@ -629,7 +635,9 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) { | |
; FAST-NEXT: movq %rsi, %rax | |
; FAST-NEXT: movq %rdi, %rcx | |
; FAST-NEXT: addq %rsi, %rcx | |
-; FAST-NEXT: cmovbq %rdi, %rax | |
+; FAST-NEXT: setb %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovneq %rdi, %rax | |
; FAST-NEXT: retq | |
%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) | |
%obit = extractvalue {i64, i1} %t, 1 | |
@@ -649,7 +657,9 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) { | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movl %esi, %eax | |
; FAST-NEXT: cmpl %esi, %edi | |
-; FAST-NEXT: cmovol %edi, %eax | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
; FAST-NEXT: retq | |
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) | |
%obit = extractvalue {i32, i1} %t, 1 | |
@@ -669,7 +679,9 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) { | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movq %rsi, %rax | |
; FAST-NEXT: cmpq %rsi, %rdi | |
-; FAST-NEXT: cmovoq %rdi, %rax | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovneq %rdi, %rax | |
; FAST-NEXT: retq | |
%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) | |
%obit = extractvalue {i64, i1} %t, 1 | |
@@ -689,7 +701,9 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) { | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movl %esi, %eax | |
; FAST-NEXT: cmpl %esi, %edi | |
-; FAST-NEXT: cmovbl %edi, %eax | |
+; FAST-NEXT: setb %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
; FAST-NEXT: retq | |
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) | |
%obit = extractvalue {i32, i1} %t, 1 | |
@@ -709,7 +723,9 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) { | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movq %rsi, %rax | |
; FAST-NEXT: cmpq %rsi, %rdi | |
-; FAST-NEXT: cmovbq %rdi, %rax | |
+; FAST-NEXT: setb %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovneq %rdi, %rax | |
; FAST-NEXT: retq | |
%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) | |
%obit = extractvalue {i64, i1} %t, 1 | |
@@ -735,7 +751,9 @@ define zeroext i1 @saddobri32(i32 %v1, i32 %v2) { | |
; FAST-LABEL: saddobri32: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: addl %esi, %edi | |
-; FAST-NEXT: jo LBB31_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB31_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -773,7 +791,9 @@ define zeroext i1 @saddobri64(i64 %v1, i64 %v2) { | |
; FAST-LABEL: saddobri64: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: addq %rsi, %rdi | |
-; FAST-NEXT: jo LBB32_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB32_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -811,7 +831,9 @@ define zeroext i1 @uaddobri32(i32 %v1, i32 %v2) { | |
; FAST-LABEL: uaddobri32: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: addl %esi, %edi | |
-; FAST-NEXT: jb LBB33_1 | |
+; FAST-NEXT: setb %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB33_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -849,7 +871,9 @@ define zeroext i1 @uaddobri64(i64 %v1, i64 %v2) { | |
; FAST-LABEL: uaddobri64: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: addq %rsi, %rdi | |
-; FAST-NEXT: jb LBB34_1 | |
+; FAST-NEXT: setb %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB34_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -887,7 +911,9 @@ define zeroext i1 @ssubobri32(i32 %v1, i32 %v2) { | |
; FAST-LABEL: ssubobri32: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: cmpl %esi, %edi | |
-; FAST-NEXT: jo LBB35_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB35_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -925,7 +951,9 @@ define zeroext i1 @ssubobri64(i64 %v1, i64 %v2) { | |
; FAST-LABEL: ssubobri64: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: cmpq %rsi, %rdi | |
-; FAST-NEXT: jo LBB36_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB36_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -963,7 +991,9 @@ define zeroext i1 @usubobri32(i32 %v1, i32 %v2) { | |
; FAST-LABEL: usubobri32: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: cmpl %esi, %edi | |
-; FAST-NEXT: jb LBB37_1 | |
+; FAST-NEXT: setb %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB37_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -1001,7 +1031,9 @@ define zeroext i1 @usubobri64(i64 %v1, i64 %v2) { | |
; FAST-LABEL: usubobri64: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: cmpq %rsi, %rdi | |
-; FAST-NEXT: jb LBB38_1 | |
+; FAST-NEXT: setb %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne LBB38_1 | |
; FAST-NEXT: ## %bb.2: ## %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -1089,10 +1121,12 @@ define i32 @incovfselectstore(i32 %v1, i32 %v2, i32* %x) { | |
; FAST-LABEL: incovfselectstore: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movl %esi, %eax | |
-; FAST-NEXT: movl %edi, %ecx | |
-; FAST-NEXT: incl %ecx | |
-; FAST-NEXT: cmovol %edi, %eax | |
-; FAST-NEXT: movl %ecx, (%rdx) | |
+; FAST-NEXT: movl %edi, %esi | |
+; FAST-NEXT: incl %esi | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
+; FAST-NEXT: movl %esi, (%rdx) | |
; FAST-NEXT: retq | |
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1) | |
%obit = extractvalue {i32, i1} %t, 1 | |
@@ -1116,10 +1150,12 @@ define i32 @decovfselectstore(i32 %v1, i32 %v2, i32* %x) { | |
; FAST-LABEL: decovfselectstore: | |
; FAST: ## %bb.0: | |
; FAST-NEXT: movl %esi, %eax | |
-; FAST-NEXT: movl %edi, %ecx | |
-; FAST-NEXT: decl %ecx | |
-; FAST-NEXT: cmovol %edi, %eax | |
-; FAST-NEXT: movl %ecx, (%rdx) | |
+; FAST-NEXT: movl %edi, %esi | |
+; FAST-NEXT: decl %esi | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
+; FAST-NEXT: movl %esi, (%rdx) | |
; FAST-NEXT: retq | |
; | |
; KNL-LABEL: decovfselectstore: | |
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll | |
index 71d92af0dd94..184c4276d088 100644 | |
--- a/llvm/test/CodeGen/X86/xmulo.ll | |
+++ b/llvm/test/CodeGen/X86/xmulo.ll | |
@@ -516,13 +516,23 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) { | |
; Check the use of the overflow bit in combination with a select instruction. | |
; | |
define i32 @smuloselecti32(i32 %v1, i32 %v2) { | |
-; LINUX-LABEL: smuloselecti32: | |
-; LINUX: # %bb.0: | |
-; LINUX-NEXT: movl %esi, %eax | |
-; LINUX-NEXT: movl %edi, %ecx | |
-; LINUX-NEXT: imull %esi, %ecx | |
-; LINUX-NEXT: cmovol %edi, %eax | |
-; LINUX-NEXT: retq | |
+; SDAG-LABEL: smuloselecti32: | |
+; SDAG: # %bb.0: | |
+; SDAG-NEXT: movl %esi, %eax | |
+; SDAG-NEXT: movl %edi, %ecx | |
+; SDAG-NEXT: imull %esi, %ecx | |
+; SDAG-NEXT: cmovol %edi, %eax | |
+; SDAG-NEXT: retq | |
+; | |
+; FAST-LABEL: smuloselecti32: | |
+; FAST: # %bb.0: | |
+; FAST-NEXT: movl %esi, %eax | |
+; FAST-NEXT: movl %edi, %ecx | |
+; FAST-NEXT: imull %esi, %ecx | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovnel %edi, %eax | |
+; FAST-NEXT: retq | |
; | |
; WIN64-LABEL: smuloselecti32: | |
; WIN64: # %bb.0: | |
@@ -550,13 +560,23 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) { | |
} | |
define i64 @smuloselecti64(i64 %v1, i64 %v2) { | |
-; LINUX-LABEL: smuloselecti64: | |
-; LINUX: # %bb.0: | |
-; LINUX-NEXT: movq %rsi, %rax | |
-; LINUX-NEXT: movq %rdi, %rcx | |
-; LINUX-NEXT: imulq %rsi, %rcx | |
-; LINUX-NEXT: cmovoq %rdi, %rax | |
-; LINUX-NEXT: retq | |
+; SDAG-LABEL: smuloselecti64: | |
+; SDAG: # %bb.0: | |
+; SDAG-NEXT: movq %rsi, %rax | |
+; SDAG-NEXT: movq %rdi, %rcx | |
+; SDAG-NEXT: imulq %rsi, %rcx | |
+; SDAG-NEXT: cmovoq %rdi, %rax | |
+; SDAG-NEXT: retq | |
+; | |
+; FAST-LABEL: smuloselecti64: | |
+; FAST: # %bb.0: | |
+; FAST-NEXT: movq %rsi, %rax | |
+; FAST-NEXT: movq %rdi, %rcx | |
+; FAST-NEXT: imulq %rsi, %rcx | |
+; FAST-NEXT: seto %cl | |
+; FAST-NEXT: testb $1, %cl | |
+; FAST-NEXT: cmovneq %rdi, %rax | |
+; FAST-NEXT: retq | |
; | |
; WIN64-LABEL: smuloselecti64: | |
; WIN64: # %bb.0: | |
@@ -648,13 +668,23 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) { | |
} | |
define i32 @umuloselecti32(i32 %v1, i32 %v2) { | |
-; LINUX-LABEL: umuloselecti32: | |
-; LINUX: # %bb.0: | |
-; LINUX-NEXT: movl %edi, %eax | |
-; LINUX-NEXT: mull %esi | |
-; LINUX-NEXT: cmovol %edi, %esi | |
-; LINUX-NEXT: movl %esi, %eax | |
-; LINUX-NEXT: retq | |
+; SDAG-LABEL: umuloselecti32: | |
+; SDAG: # %bb.0: | |
+; SDAG-NEXT: movl %edi, %eax | |
+; SDAG-NEXT: mull %esi | |
+; SDAG-NEXT: cmovol %edi, %esi | |
+; SDAG-NEXT: movl %esi, %eax | |
+; SDAG-NEXT: retq | |
+; | |
+; FAST-LABEL: umuloselecti32: | |
+; FAST: # %bb.0: | |
+; FAST-NEXT: movl %edi, %eax | |
+; FAST-NEXT: mull %esi | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: cmovnel %edi, %esi | |
+; FAST-NEXT: movl %esi, %eax | |
+; FAST-NEXT: retq | |
; | |
; WIN64-LABEL: umuloselecti32: | |
; WIN64: # %bb.0: | |
@@ -686,13 +716,23 @@ define i32 @umuloselecti32(i32 %v1, i32 %v2) { | |
} | |
define i64 @umuloselecti64(i64 %v1, i64 %v2) { | |
-; LINUX-LABEL: umuloselecti64: | |
-; LINUX: # %bb.0: | |
-; LINUX-NEXT: movq %rdi, %rax | |
-; LINUX-NEXT: mulq %rsi | |
-; LINUX-NEXT: cmovoq %rdi, %rsi | |
-; LINUX-NEXT: movq %rsi, %rax | |
-; LINUX-NEXT: retq | |
+; SDAG-LABEL: umuloselecti64: | |
+; SDAG: # %bb.0: | |
+; SDAG-NEXT: movq %rdi, %rax | |
+; SDAG-NEXT: mulq %rsi | |
+; SDAG-NEXT: cmovoq %rdi, %rsi | |
+; SDAG-NEXT: movq %rsi, %rax | |
+; SDAG-NEXT: retq | |
+; | |
+; FAST-LABEL: umuloselecti64: | |
+; FAST: # %bb.0: | |
+; FAST-NEXT: movq %rdi, %rax | |
+; FAST-NEXT: mulq %rsi | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: cmovneq %rdi, %rsi | |
+; FAST-NEXT: movq %rsi, %rax | |
+; FAST-NEXT: retq | |
; | |
; WIN64-LABEL: umuloselecti64: | |
; WIN64: # %bb.0: | |
@@ -905,7 +945,9 @@ define zeroext i1 @smulobri32(i32 %v1, i32 %v2) { | |
; FAST-LABEL: smulobri32: | |
; FAST: # %bb.0: | |
; FAST-NEXT: imull %esi, %edi | |
-; FAST-NEXT: jo .LBB17_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne .LBB17_1 | |
; FAST-NEXT: # %bb.2: # %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -966,7 +1008,9 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) { | |
; FAST-LABEL: smulobri64: | |
; FAST: # %bb.0: | |
; FAST-NEXT: imulq %rsi, %rdi | |
-; FAST-NEXT: jo .LBB18_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne .LBB18_1 | |
; FAST-NEXT: # %bb.2: # %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -1228,7 +1272,9 @@ define zeroext i1 @umulobri32(i32 %v1, i32 %v2) { | |
; FAST: # %bb.0: | |
; FAST-NEXT: movl %edi, %eax | |
; FAST-NEXT: mull %esi | |
-; FAST-NEXT: jo .LBB21_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne .LBB21_1 | |
; FAST-NEXT: # %bb.2: # %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al | |
@@ -1292,7 +1338,9 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) { | |
; FAST: # %bb.0: | |
; FAST-NEXT: movq %rdi, %rax | |
; FAST-NEXT: mulq %rsi | |
-; FAST-NEXT: jo .LBB22_1 | |
+; FAST-NEXT: seto %al | |
+; FAST-NEXT: testb $1, %al | |
+; FAST-NEXT: jne .LBB22_1 | |
; FAST-NEXT: # %bb.2: # %continue | |
; FAST-NEXT: movb $1, %al | |
; FAST-NEXT: andb $1, %al |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment