Skip to content

Instantly share code, notes, and snippets.

@nikic

nikic/xalu.patch Secret

Created April 5, 2022 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nikic/9e59e8e7b635a54f0d75aad89b2632d6 to your computer and use it in GitHub Desktop.
Save nikic/9e59e8e7b635a54f0d75aad89b2632d6 to your computer and use it in GitHub Desktop.
commit 4d4fb69830b49e729ee6ecf1c2a468c2f05ffffd
Author: Nikita Popov <npopov@redhat.com>
Date: Tue Apr 5 16:31:36 2022 +0200
remove entirely
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 397c61261e3d..adf689d44cd0 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -159,9 +159,6 @@ private:
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len);
- bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
- const Value *Cond);
-
const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
X86AddressMode &AM);
@@ -221,70 +218,6 @@ X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
return ::addFullAddress(MIB, AM);
}
-/// Check if it is possible to fold the condition from the XALU intrinsic
-/// into the user. The condition code will only be updated on success.
-bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
- const Value *Cond) {
- if (!isa<ExtractValueInst>(Cond))
- return false;
-
- const auto *EV = cast<ExtractValueInst>(Cond);
- if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
- return false;
-
- const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
- MVT RetVT;
- const Function *Callee = II->getCalledFunction();
- Type *RetTy =
- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
- if (!isTypeLegal(RetTy, RetVT))
- return false;
-
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return false;
-
- X86::CondCode TmpCC;
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
- }
-
- // Check if both instructions are in the same basic block.
- if (II->getParent() != I->getParent())
- return false;
-
- // Make sure nothing is in the way
- BasicBlock::const_iterator Start(I);
- BasicBlock::const_iterator End(II);
- for (auto Itr = std::prev(Start); Itr != End; --Itr) {
- // We only expect extractvalue instructions between the intrinsic and the
- // instruction to be selected.
- if (!isa<ExtractValueInst>(Itr))
- return false;
-
- // Check that the extractvalue operand comes from the intrinsic.
- const auto *EVI = cast<ExtractValueInst>(Itr);
- if (EVI->getAggregateOperand() != II)
- return false;
- }
-
- // Make sure no potentially eflags clobbering phi moves can be inserted in
- // between.
- auto HasPhis = [](const BasicBlock *Succ) {
- return !llvm::empty(Succ->phis());
- };
- if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
- return false;
-
- CC = TmpCC;
- return true;
-}
-
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
@@ -1749,17 +1682,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
return true;
}
}
- } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- Register TmpReg = getRegForValue(BI->getCondition());
- if (TmpReg == 0)
- return false;
-
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
- .addMBB(TrueMBB).addImm(CC);
- finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
- return true;
}
// Otherwise do a clumsy setcc and re-test it.
@@ -2092,14 +2014,6 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
}
}
NeedTest = false;
- } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- Register TmpReg = getRegForValue(Cond);
- if (TmpReg == 0)
- return false;
-
- NeedTest = false;
}
if (NeedTest) {
diff --git a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index 94e1db3840b1..b80b5c48064c 100644
--- a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -13,6 +13,8 @@ define fastcc i32 @test() nounwind {
; FASTISEL: ## %bb.0: ## %entry
; FASTISEL-NEXT: movl $1, %eax
; FASTISEL-NEXT: addl $0, %eax
+; FASTISEL-NEXT: seto %al
+; FASTISEL-NEXT: testb $1, %al
; FASTISEL-NEXT: xorl %eax, %eax
; FASTISEL-NEXT: retq
;
@@ -21,7 +23,8 @@ define fastcc i32 @test() nounwind {
; AVX512F-NEXT: movl $1, %eax
; AVX512F-NEXT: addl $0, %eax
; AVX512F-NEXT: seto %al
-; AVX512F-NEXT: jo LBB0_2
+; AVX512F-NEXT: testb $1, %al
+; AVX512F-NEXT: jne LBB0_2
; AVX512F-NEXT: ## %bb.1: ## %BB3
; AVX512F-NEXT: LBB0_2: ## %.backedge
; AVX512F-NEXT: xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/pr54369.ll b/llvm/test/CodeGen/X86/pr54369.ll
index 818b4f9d87ea..d0264db13feb 100644
--- a/llvm/test/CodeGen/X86/pr54369.ll
+++ b/llvm/test/CodeGen/X86/pr54369.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-- -O0 < %s | FileCheck %s
-; FIXME: This is currently miscompiled due to an eflags clobber.
define i64 @adder(i64 %lhs, i64 %rhs) {
; CHECK-LABEL: adder:
; CHECK: # %bb.0:
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: seto %al
+; CHECK-NEXT: seto %dl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: # kill: def $rax killed $eax
; CHECK-NEXT: movl $148, %ecx
-; CHECK-NEXT: cmovoq %rcx, %rax
+; CHECK-NEXT: testb $1, %dl
+; CHECK-NEXT: cmovneq %rcx, %rax
; CHECK-NEXT: retq
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %lhs, i64 %rhs)
%errorbit = extractvalue { i64, i1 } %res, 1
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index 0de8bdeddd6f..bed76889d7dd 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -563,7 +563,9 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) {
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: movl %edi, %ecx
; FAST-NEXT: addl %esi, %ecx
-; FAST-NEXT: cmovol %edi, %eax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -585,7 +587,9 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) {
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: movq %rdi, %rcx
; FAST-NEXT: addq %rsi, %rcx
-; FAST-NEXT: cmovoq %rdi, %rax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -607,7 +611,9 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) {
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: movl %edi, %ecx
; FAST-NEXT: addl %esi, %ecx
-; FAST-NEXT: cmovbl %edi, %eax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -629,7 +635,9 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) {
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: movq %rdi, %rcx
; FAST-NEXT: addq %rsi, %rcx
-; FAST-NEXT: cmovbq %rdi, %rax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -649,7 +657,9 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: cmovol %edi, %eax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -669,7 +679,9 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: cmovoq %rdi, %rax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -689,7 +701,9 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: cmovbl %edi, %eax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -709,7 +723,9 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: cmovbq %rdi, %rax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -735,7 +751,9 @@ define zeroext i1 @saddobri32(i32 %v1, i32 %v2) {
; FAST-LABEL: saddobri32:
; FAST: ## %bb.0:
; FAST-NEXT: addl %esi, %edi
-; FAST-NEXT: jo LBB31_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB31_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -773,7 +791,9 @@ define zeroext i1 @saddobri64(i64 %v1, i64 %v2) {
; FAST-LABEL: saddobri64:
; FAST: ## %bb.0:
; FAST-NEXT: addq %rsi, %rdi
-; FAST-NEXT: jo LBB32_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB32_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -811,7 +831,9 @@ define zeroext i1 @uaddobri32(i32 %v1, i32 %v2) {
; FAST-LABEL: uaddobri32:
; FAST: ## %bb.0:
; FAST-NEXT: addl %esi, %edi
-; FAST-NEXT: jb LBB33_1
+; FAST-NEXT: setb %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB33_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -849,7 +871,9 @@ define zeroext i1 @uaddobri64(i64 %v1, i64 %v2) {
; FAST-LABEL: uaddobri64:
; FAST: ## %bb.0:
; FAST-NEXT: addq %rsi, %rdi
-; FAST-NEXT: jb LBB34_1
+; FAST-NEXT: setb %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB34_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -887,7 +911,9 @@ define zeroext i1 @ssubobri32(i32 %v1, i32 %v2) {
; FAST-LABEL: ssubobri32:
; FAST: ## %bb.0:
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: jo LBB35_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB35_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -925,7 +951,9 @@ define zeroext i1 @ssubobri64(i64 %v1, i64 %v2) {
; FAST-LABEL: ssubobri64:
; FAST: ## %bb.0:
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: jo LBB36_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB36_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -963,7 +991,9 @@ define zeroext i1 @usubobri32(i32 %v1, i32 %v2) {
; FAST-LABEL: usubobri32:
; FAST: ## %bb.0:
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: jb LBB37_1
+; FAST-NEXT: setb %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB37_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -1001,7 +1031,9 @@ define zeroext i1 @usubobri64(i64 %v1, i64 %v2) {
; FAST-LABEL: usubobri64:
; FAST: ## %bb.0:
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: jb LBB38_1
+; FAST-NEXT: setb %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne LBB38_1
; FAST-NEXT: ## %bb.2: ## %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -1089,10 +1121,12 @@ define i32 @incovfselectstore(i32 %v1, i32 %v2, i32* %x) {
; FAST-LABEL: incovfselectstore:
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
-; FAST-NEXT: movl %edi, %ecx
-; FAST-NEXT: incl %ecx
-; FAST-NEXT: cmovol %edi, %eax
-; FAST-NEXT: movl %ecx, (%rdx)
+; FAST-NEXT: movl %edi, %esi
+; FAST-NEXT: incl %esi
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: movl %esi, (%rdx)
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
%obit = extractvalue {i32, i1} %t, 1
@@ -1116,10 +1150,12 @@ define i32 @decovfselectstore(i32 %v1, i32 %v2, i32* %x) {
; FAST-LABEL: decovfselectstore:
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
-; FAST-NEXT: movl %edi, %ecx
-; FAST-NEXT: decl %ecx
-; FAST-NEXT: cmovol %edi, %eax
-; FAST-NEXT: movl %ecx, (%rdx)
+; FAST-NEXT: movl %edi, %esi
+; FAST-NEXT: decl %esi
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: movl %esi, (%rdx)
; FAST-NEXT: retq
;
; KNL-LABEL: decovfselectstore:
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 71d92af0dd94..184c4276d088 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -516,13 +516,23 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
; Check the use of the overflow bit in combination with a select instruction.
;
define i32 @smuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: smuloselecti32:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movl %esi, %eax
-; LINUX-NEXT: movl %edi, %ecx
-; LINUX-NEXT: imull %esi, %ecx
-; LINUX-NEXT: cmovol %edi, %eax
-; LINUX-NEXT: retq
+; SDAG-LABEL: smuloselecti32:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: movl %edi, %ecx
+; SDAG-NEXT: imull %esi, %ecx
+; SDAG-NEXT: cmovol %edi, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloselecti32:
+; FAST: # %bb.0:
+; FAST-NEXT: movl %esi, %eax
+; FAST-NEXT: movl %edi, %ecx
+; FAST-NEXT: imull %esi, %ecx
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: retq
;
; WIN64-LABEL: smuloselecti32:
; WIN64: # %bb.0:
@@ -550,13 +560,23 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) {
}
define i64 @smuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: smuloselecti64:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movq %rsi, %rax
-; LINUX-NEXT: movq %rdi, %rcx
-; LINUX-NEXT: imulq %rsi, %rcx
-; LINUX-NEXT: cmovoq %rdi, %rax
-; LINUX-NEXT: retq
+; SDAG-LABEL: smuloselecti64:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: movq %rdi, %rcx
+; SDAG-NEXT: imulq %rsi, %rcx
+; SDAG-NEXT: cmovoq %rdi, %rax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloselecti64:
+; FAST: # %bb.0:
+; FAST-NEXT: movq %rsi, %rax
+; FAST-NEXT: movq %rdi, %rcx
+; FAST-NEXT: imulq %rsi, %rcx
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
+; FAST-NEXT: retq
;
; WIN64-LABEL: smuloselecti64:
; WIN64: # %bb.0:
@@ -648,13 +668,23 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
}
define i32 @umuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: umuloselecti32:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movl %edi, %eax
-; LINUX-NEXT: mull %esi
-; LINUX-NEXT: cmovol %edi, %esi
-; LINUX-NEXT: movl %esi, %eax
-; LINUX-NEXT: retq
+; SDAG-LABEL: umuloselecti32:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: mull %esi
+; SDAG-NEXT: cmovol %edi, %esi
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloselecti32:
+; FAST: # %bb.0:
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: mull %esi
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: cmovnel %edi, %esi
+; FAST-NEXT: movl %esi, %eax
+; FAST-NEXT: retq
;
; WIN64-LABEL: umuloselecti32:
; WIN64: # %bb.0:
@@ -686,13 +716,23 @@ define i32 @umuloselecti32(i32 %v1, i32 %v2) {
}
define i64 @umuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: umuloselecti64:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movq %rdi, %rax
-; LINUX-NEXT: mulq %rsi
-; LINUX-NEXT: cmovoq %rdi, %rsi
-; LINUX-NEXT: movq %rsi, %rax
-; LINUX-NEXT: retq
+; SDAG-LABEL: umuloselecti64:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movq %rdi, %rax
+; SDAG-NEXT: mulq %rsi
+; SDAG-NEXT: cmovoq %rdi, %rsi
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloselecti64:
+; FAST: # %bb.0:
+; FAST-NEXT: movq %rdi, %rax
+; FAST-NEXT: mulq %rsi
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: cmovneq %rdi, %rsi
+; FAST-NEXT: movq %rsi, %rax
+; FAST-NEXT: retq
;
; WIN64-LABEL: umuloselecti64:
; WIN64: # %bb.0:
@@ -905,7 +945,9 @@ define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
; FAST-LABEL: smulobri32:
; FAST: # %bb.0:
; FAST-NEXT: imull %esi, %edi
-; FAST-NEXT: jo .LBB17_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne .LBB17_1
; FAST-NEXT: # %bb.2: # %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -966,7 +1008,9 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
; FAST-LABEL: smulobri64:
; FAST: # %bb.0:
; FAST-NEXT: imulq %rsi, %rdi
-; FAST-NEXT: jo .LBB18_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne .LBB18_1
; FAST-NEXT: # %bb.2: # %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -1228,7 +1272,9 @@ define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
; FAST: # %bb.0:
; FAST-NEXT: movl %edi, %eax
; FAST-NEXT: mull %esi
-; FAST-NEXT: jo .LBB21_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne .LBB21_1
; FAST-NEXT: # %bb.2: # %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
@@ -1292,7 +1338,9 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
; FAST: # %bb.0:
; FAST-NEXT: movq %rdi, %rax
; FAST-NEXT: mulq %rsi
-; FAST-NEXT: jo .LBB22_1
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: jne .LBB22_1
; FAST-NEXT: # %bb.2: # %continue
; FAST-NEXT: movb $1, %al
; FAST-NEXT: andb $1, %al
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment