nikic/xalu.patch Secret

## xalu.patch
commit 4d4fb69830b49e729ee6ecf1c2a468c2f05ffffd
Author: Nikita Popov <npopov@redhat.com>
Date:   Tue Apr 5 16:31:36 2022 +0200

    remove entirely

diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 397c61261e3d..adf689d44cd0 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -159,9 +159,6 @@ private:
   bool TryEmitSmallMemcpy(X86AddressMode DestAM,
                           X86AddressMode SrcAM, uint64_t Len);

-  bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
-                            const Value *Cond);
-
   const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
                                             X86AddressMode &AM);

@@ -221,70 +218,6 @@ X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
   return ::addFullAddress(MIB, AM);
 }

-/// Check if it is possible to fold the condition from the XALU intrinsic
-/// into the user. The condition code will only be updated on success.
-bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
-                                       const Value *Cond) {
-  if (!isa<ExtractValueInst>(Cond))
-    return false;
-
-  const auto *EV = cast<ExtractValueInst>(Cond);
-  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
-    return false;
-
-  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
-  MVT RetVT;
-  const Function *Callee = II->getCalledFunction();
-  Type *RetTy =
-    cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
-  if (!isTypeLegal(RetTy, RetVT))
-    return false;
-
-  if (RetVT != MVT::i32 && RetVT != MVT::i64)
-    return false;
-
-  X86::CondCode TmpCC;
-  switch (II->getIntrinsicID()) {
-  default: return false;
-  case Intrinsic::sadd_with_overflow:
-  case Intrinsic::ssub_with_overflow:
-  case Intrinsic::smul_with_overflow:
-  case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
-  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
-  }
-
-  // Check if both instructions are in the same basic block.
-  if (II->getParent() != I->getParent())
-    return false;
-
-  // Make sure nothing is in the way
-  BasicBlock::const_iterator Start(I);
-  BasicBlock::const_iterator End(II);
-  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
-    // We only expect extractvalue instructions between the intrinsic and the
-    // instruction to be selected.
-    if (!isa<ExtractValueInst>(Itr))
-      return false;
-
-    // Check that the extractvalue operand comes from the intrinsic.
-    const auto *EVI = cast<ExtractValueInst>(Itr);
-    if (EVI->getAggregateOperand() != II)
-      return false;
-  }
-
-  // Make sure no potentially eflags clobbering phi moves can be inserted in
-  // between.
-  auto HasPhis = [](const BasicBlock *Succ) {
-    return !llvm::empty(Succ->phis());
-  };
-  if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
-    return false;
-
-  CC = TmpCC;
-  return true;
-}
-
 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
   EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true);
   if (evt == MVT::Other || !evt.isSimple())
@@ -1749,17 +1682,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
         return true;
       }
     }
-  } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
-    // Fake request the condition, otherwise the intrinsic might be completely
-    // optimized away.
-    Register TmpReg = getRegForValue(BI->getCondition());
-    if (TmpReg == 0)
-      return false;
-
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
-      .addMBB(TrueMBB).addImm(CC);
-    finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
-    return true;
   }

   // Otherwise do a clumsy setcc and re-test it.
@@ -2092,14 +2014,6 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
       }
     }
     NeedTest = false;
-  } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
-    // Fake request the condition, otherwise the intrinsic might be completely
-    // optimized away.
-    Register TmpReg = getRegForValue(Cond);
-    if (TmpReg == 0)
-      return false;
-
-    NeedTest = false;
   }

   if (NeedTest) {
diff --git a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index 94e1db3840b1..b80b5c48064c 100644
--- a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -13,6 +13,8 @@ define fastcc i32 @test() nounwind {
 ; FASTISEL:       ## %bb.0: ## %entry
 ; FASTISEL-NEXT:    movl $1, %eax
 ; FASTISEL-NEXT:    addl $0, %eax
+; FASTISEL-NEXT:    seto %al
+; FASTISEL-NEXT:    testb $1, %al
 ; FASTISEL-NEXT:    xorl %eax, %eax
 ; FASTISEL-NEXT:    retq
 ;
@@ -21,7 +23,8 @@ define fastcc i32 @test() nounwind {
 ; AVX512F-NEXT:    movl $1, %eax
 ; AVX512F-NEXT:    addl $0, %eax
 ; AVX512F-NEXT:    seto %al
-; AVX512F-NEXT:    jo LBB0_2
+; AVX512F-NEXT:    testb $1, %al
+; AVX512F-NEXT:    jne LBB0_2
 ; AVX512F-NEXT:  ## %bb.1: ## %BB3
 ; AVX512F-NEXT:  LBB0_2: ## %.backedge
 ; AVX512F-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/pr54369.ll b/llvm/test/CodeGen/X86/pr54369.ll
index 818b4f9d87ea..d0264db13feb 100644
--- a/llvm/test/CodeGen/X86/pr54369.ll
+++ b/llvm/test/CodeGen/X86/pr54369.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-- -O0 < %s | FileCheck %s

-; FIXME: This is currently miscompiled due to an eflags clobber.
 define i64 @adder(i64 %lhs, i64 %rhs) {
 ; CHECK-LABEL: adder:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addq %rsi, %rdi
-; CHECK-NEXT:    seto %al
+; CHECK-NEXT:    seto %dl
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    # kill: def $rax killed $eax
 ; CHECK-NEXT:    movl $148, %ecx
-; CHECK-NEXT:    cmovoq %rcx, %rax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    cmovneq %rcx, %rax
 ; CHECK-NEXT:    retq
 	%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %lhs, i64 %rhs)
 	%errorbit = extractvalue { i64, i1 } %res, 1
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index 0de8bdeddd6f..bed76889d7dd 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -563,7 +563,9 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) {
 ; FAST-NEXT:    movl %esi, %eax
 ; FAST-NEXT:    movl %edi, %ecx
 ; FAST-NEXT:    addl %esi, %ecx
-; FAST-NEXT:    cmovol %edi, %eax
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
 ; FAST-NEXT:    retq
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -585,7 +587,9 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) {
 ; FAST-NEXT:    movq %rsi, %rax
 ; FAST-NEXT:    movq %rdi, %rcx
 ; FAST-NEXT:    addq %rsi, %rcx
-; FAST-NEXT:    cmovoq %rdi, %rax
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovneq %rdi, %rax
 ; FAST-NEXT:    retq
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -607,7 +611,9 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) {
 ; FAST-NEXT:    movl %esi, %eax
 ; FAST-NEXT:    movl %edi, %ecx
 ; FAST-NEXT:    addl %esi, %ecx
-; FAST-NEXT:    cmovbl %edi, %eax
+; FAST-NEXT:    setb %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
 ; FAST-NEXT:    retq
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -629,7 +635,9 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) {
 ; FAST-NEXT:    movq %rsi, %rax
 ; FAST-NEXT:    movq %rdi, %rcx
 ; FAST-NEXT:    addq %rsi, %rcx
-; FAST-NEXT:    cmovbq %rdi, %rax
+; FAST-NEXT:    setb %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovneq %rdi, %rax
 ; FAST-NEXT:    retq
   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -649,7 +657,9 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) {
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movl %esi, %eax
 ; FAST-NEXT:    cmpl %esi, %edi
-; FAST-NEXT:    cmovol %edi, %eax
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
 ; FAST-NEXT:    retq
   %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -669,7 +679,9 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) {
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movq %rsi, %rax
 ; FAST-NEXT:    cmpq %rsi, %rdi
-; FAST-NEXT:    cmovoq %rdi, %rax
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovneq %rdi, %rax
 ; FAST-NEXT:    retq
   %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -689,7 +701,9 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) {
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movl %esi, %eax
 ; FAST-NEXT:    cmpl %esi, %edi
-; FAST-NEXT:    cmovbl %edi, %eax
+; FAST-NEXT:    setb %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
 ; FAST-NEXT:    retq
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -709,7 +723,9 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) {
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movq %rsi, %rax
 ; FAST-NEXT:    cmpq %rsi, %rdi
-; FAST-NEXT:    cmovbq %rdi, %rax
+; FAST-NEXT:    setb %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovneq %rdi, %rax
 ; FAST-NEXT:    retq
   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -735,7 +751,9 @@ define zeroext i1 @saddobri32(i32 %v1, i32 %v2) {
 ; FAST-LABEL: saddobri32:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    addl %esi, %edi
-; FAST-NEXT:    jo LBB31_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB31_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -773,7 +791,9 @@ define zeroext i1 @saddobri64(i64 %v1, i64 %v2) {
 ; FAST-LABEL: saddobri64:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    addq %rsi, %rdi
-; FAST-NEXT:    jo LBB32_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB32_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -811,7 +831,9 @@ define zeroext i1 @uaddobri32(i32 %v1, i32 %v2) {
 ; FAST-LABEL: uaddobri32:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    addl %esi, %edi
-; FAST-NEXT:    jb LBB33_1
+; FAST-NEXT:    setb %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB33_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -849,7 +871,9 @@ define zeroext i1 @uaddobri64(i64 %v1, i64 %v2) {
 ; FAST-LABEL: uaddobri64:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    addq %rsi, %rdi
-; FAST-NEXT:    jb LBB34_1
+; FAST-NEXT:    setb %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB34_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -887,7 +911,9 @@ define zeroext i1 @ssubobri32(i32 %v1, i32 %v2) {
 ; FAST-LABEL: ssubobri32:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    cmpl %esi, %edi
-; FAST-NEXT:    jo LBB35_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB35_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -925,7 +951,9 @@ define zeroext i1 @ssubobri64(i64 %v1, i64 %v2) {
 ; FAST-LABEL: ssubobri64:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    cmpq %rsi, %rdi
-; FAST-NEXT:    jo LBB36_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB36_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -963,7 +991,9 @@ define zeroext i1 @usubobri32(i32 %v1, i32 %v2) {
 ; FAST-LABEL: usubobri32:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    cmpl %esi, %edi
-; FAST-NEXT:    jb LBB37_1
+; FAST-NEXT:    setb %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB37_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -1001,7 +1031,9 @@ define zeroext i1 @usubobri64(i64 %v1, i64 %v2) {
 ; FAST-LABEL: usubobri64:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    cmpq %rsi, %rdi
-; FAST-NEXT:    jb LBB38_1
+; FAST-NEXT:    setb %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne LBB38_1
 ; FAST-NEXT:  ## %bb.2: ## %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -1089,10 +1121,12 @@ define i32 @incovfselectstore(i32 %v1, i32 %v2, i32* %x) {
 ; FAST-LABEL: incovfselectstore:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movl %esi, %eax
-; FAST-NEXT:    movl %edi, %ecx
-; FAST-NEXT:    incl %ecx
-; FAST-NEXT:    cmovol %edi, %eax
-; FAST-NEXT:    movl %ecx, (%rdx)
+; FAST-NEXT:    movl %edi, %esi
+; FAST-NEXT:    incl %esi
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
+; FAST-NEXT:    movl %esi, (%rdx)
 ; FAST-NEXT:    retq
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
   %obit = extractvalue {i32, i1} %t, 1
@@ -1116,10 +1150,12 @@ define i32 @decovfselectstore(i32 %v1, i32 %v2, i32* %x) {
 ; FAST-LABEL: decovfselectstore:
 ; FAST:       ## %bb.0:
 ; FAST-NEXT:    movl %esi, %eax
-; FAST-NEXT:    movl %edi, %ecx
-; FAST-NEXT:    decl %ecx
-; FAST-NEXT:    cmovol %edi, %eax
-; FAST-NEXT:    movl %ecx, (%rdx)
+; FAST-NEXT:    movl %edi, %esi
+; FAST-NEXT:    decl %esi
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
+; FAST-NEXT:    movl %esi, (%rdx)
 ; FAST-NEXT:    retq
 ;
 ; KNL-LABEL: decovfselectstore:
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 71d92af0dd94..184c4276d088 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -516,13 +516,23 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
 ; Check the use of the overflow bit in combination with a select instruction.
 ;
 define i32 @smuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: smuloselecti32:
-; LINUX:       # %bb.0:
-; LINUX-NEXT:    movl %esi, %eax
-; LINUX-NEXT:    movl %edi, %ecx
-; LINUX-NEXT:    imull %esi, %ecx
-; LINUX-NEXT:    cmovol %edi, %eax
-; LINUX-NEXT:    retq
+; SDAG-LABEL: smuloselecti32:
+; SDAG:       # %bb.0:
+; SDAG-NEXT:    movl %esi, %eax
+; SDAG-NEXT:    movl %edi, %ecx
+; SDAG-NEXT:    imull %esi, %ecx
+; SDAG-NEXT:    cmovol %edi, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloselecti32:
+; FAST:       # %bb.0:
+; FAST-NEXT:    movl %esi, %eax
+; FAST-NEXT:    movl %edi, %ecx
+; FAST-NEXT:    imull %esi, %ecx
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovnel %edi, %eax
+; FAST-NEXT:    retq
 ;
 ; WIN64-LABEL: smuloselecti32:
 ; WIN64:       # %bb.0:
@@ -550,13 +560,23 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) {
 }

 define i64 @smuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: smuloselecti64:
-; LINUX:       # %bb.0:
-; LINUX-NEXT:    movq %rsi, %rax
-; LINUX-NEXT:    movq %rdi, %rcx
-; LINUX-NEXT:    imulq %rsi, %rcx
-; LINUX-NEXT:    cmovoq %rdi, %rax
-; LINUX-NEXT:    retq
+; SDAG-LABEL: smuloselecti64:
+; SDAG:       # %bb.0:
+; SDAG-NEXT:    movq %rsi, %rax
+; SDAG-NEXT:    movq %rdi, %rcx
+; SDAG-NEXT:    imulq %rsi, %rcx
+; SDAG-NEXT:    cmovoq %rdi, %rax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: smuloselecti64:
+; FAST:       # %bb.0:
+; FAST-NEXT:    movq %rsi, %rax
+; FAST-NEXT:    movq %rdi, %rcx
+; FAST-NEXT:    imulq %rsi, %rcx
+; FAST-NEXT:    seto %cl
+; FAST-NEXT:    testb $1, %cl
+; FAST-NEXT:    cmovneq %rdi, %rax
+; FAST-NEXT:    retq
 ;
 ; WIN64-LABEL: smuloselecti64:
 ; WIN64:       # %bb.0:
@@ -648,13 +668,23 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
 }

 define i32 @umuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: umuloselecti32:
-; LINUX:       # %bb.0:
-; LINUX-NEXT:    movl %edi, %eax
-; LINUX-NEXT:    mull %esi
-; LINUX-NEXT:    cmovol %edi, %esi
-; LINUX-NEXT:    movl %esi, %eax
-; LINUX-NEXT:    retq
+; SDAG-LABEL: umuloselecti32:
+; SDAG:       # %bb.0:
+; SDAG-NEXT:    movl %edi, %eax
+; SDAG-NEXT:    mull %esi
+; SDAG-NEXT:    cmovol %edi, %esi
+; SDAG-NEXT:    movl %esi, %eax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloselecti32:
+; FAST:       # %bb.0:
+; FAST-NEXT:    movl %edi, %eax
+; FAST-NEXT:    mull %esi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    cmovnel %edi, %esi
+; FAST-NEXT:    movl %esi, %eax
+; FAST-NEXT:    retq
 ;
 ; WIN64-LABEL: umuloselecti32:
 ; WIN64:       # %bb.0:
@@ -686,13 +716,23 @@ define i32 @umuloselecti32(i32 %v1, i32 %v2) {
 }

 define i64 @umuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: umuloselecti64:
-; LINUX:       # %bb.0:
-; LINUX-NEXT:    movq %rdi, %rax
-; LINUX-NEXT:    mulq %rsi
-; LINUX-NEXT:    cmovoq %rdi, %rsi
-; LINUX-NEXT:    movq %rsi, %rax
-; LINUX-NEXT:    retq
+; SDAG-LABEL: umuloselecti64:
+; SDAG:       # %bb.0:
+; SDAG-NEXT:    movq %rdi, %rax
+; SDAG-NEXT:    mulq %rsi
+; SDAG-NEXT:    cmovoq %rdi, %rsi
+; SDAG-NEXT:    movq %rsi, %rax
+; SDAG-NEXT:    retq
+;
+; FAST-LABEL: umuloselecti64:
+; FAST:       # %bb.0:
+; FAST-NEXT:    movq %rdi, %rax
+; FAST-NEXT:    mulq %rsi
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    cmovneq %rdi, %rsi
+; FAST-NEXT:    movq %rsi, %rax
+; FAST-NEXT:    retq
 ;
 ; WIN64-LABEL: umuloselecti64:
 ; WIN64:       # %bb.0:
@@ -905,7 +945,9 @@ define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
 ; FAST-LABEL: smulobri32:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    imull %esi, %edi
-; FAST-NEXT:    jo .LBB17_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne .LBB17_1
 ; FAST-NEXT:  # %bb.2: # %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -966,7 +1008,9 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
 ; FAST-LABEL: smulobri64:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    imulq %rsi, %rdi
-; FAST-NEXT:    jo .LBB18_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne .LBB18_1
 ; FAST-NEXT:  # %bb.2: # %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -1228,7 +1272,9 @@ define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    movl %edi, %eax
 ; FAST-NEXT:    mull %esi
-; FAST-NEXT:    jo .LBB21_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne .LBB21_1
 ; FAST-NEXT:  # %bb.2: # %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
@@ -1292,7 +1338,9 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    movq %rdi, %rax
 ; FAST-NEXT:    mulq %rsi
-; FAST-NEXT:    jo .LBB22_1
+; FAST-NEXT:    seto %al
+; FAST-NEXT:    testb $1, %al
+; FAST-NEXT:    jne .LBB22_1
 ; FAST-NEXT:  # %bb.2: # %continue
 ; FAST-NEXT:    movb $1, %al
 ; FAST-NEXT:    andb $1, %al
	commit 4d4fb69830b49e729ee6ecf1c2a468c2f05ffffd
	Author: Nikita Popov <npopov@redhat.com>
	Date: Tue Apr 5 16:31:36 2022 +0200

	remove entirely

	diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
	index 397c61261e3d..adf689d44cd0 100644
	--- a/llvm/lib/Target/X86/X86FastISel.cpp
	+++ b/llvm/lib/Target/X86/X86FastISel.cpp
	@@ -159,9 +159,6 @@ private:
	bool TryEmitSmallMemcpy(X86AddressMode DestAM,
	X86AddressMode SrcAM, uint64_t Len);

	- bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
	- const Value *Cond);
	-
	const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
	X86AddressMode &AM);

	@@ -221,70 +218,6 @@ X86FastISel::addFullAddress(const MachineInstrBuilder &MIB,
	return ::addFullAddress(MIB, AM);
	}

	-/// Check if it is possible to fold the condition from the XALU intrinsic
	-/// into the user. The condition code will only be updated on success.
	-bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
	- const Value *Cond) {
	- if (!isa<ExtractValueInst>(Cond))
	- return false;
	-
	- const auto *EV = cast<ExtractValueInst>(Cond);
	- if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
	- return false;
	-
	- const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
	- MVT RetVT;
	- const Function *Callee = II->getCalledFunction();
	- Type *RetTy =
	- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
	- if (!isTypeLegal(RetTy, RetVT))
	- return false;
	-
	- if (RetVT != MVT::i32 && RetVT != MVT::i64)
	- return false;
	-
	- X86::CondCode TmpCC;
	- switch (II->getIntrinsicID()) {
	- default: return false;
	- case Intrinsic::sadd_with_overflow:
	- case Intrinsic::ssub_with_overflow:
	- case Intrinsic::smul_with_overflow:
	- case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
	- case Intrinsic::uadd_with_overflow:
	- case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
	- }
	-
	- // Check if both instructions are in the same basic block.
	- if (II->getParent() != I->getParent())
	- return false;
	-
	- // Make sure nothing is in the way
	- BasicBlock::const_iterator Start(I);
	- BasicBlock::const_iterator End(II);
	- for (auto Itr = std::prev(Start); Itr != End; --Itr) {
	- // We only expect extractvalue instructions between the intrinsic and the
	- // instruction to be selected.
	- if (!isa<ExtractValueInst>(Itr))
	- return false;
	-
	- // Check that the extractvalue operand comes from the intrinsic.
	- const auto *EVI = cast<ExtractValueInst>(Itr);
	- if (EVI->getAggregateOperand() != II)
	- return false;
	- }
	-
	- // Make sure no potentially eflags clobbering phi moves can be inserted in
	- // between.
	- auto HasPhis = [](const BasicBlock *Succ) {
	- return !llvm::empty(Succ->phis());
	- };
	- if (I->isTerminator() && llvm::any_of(successors(I), HasPhis))
	- return false;
	-
	- CC = TmpCC;
	- return true;
	-}
	-
	bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
	EVT evt = TLI.getValueType(DL, Ty, /AllowUnknown=/true);
	if (evt == MVT::Other \|\| !evt.isSimple())
	@@ -1749,17 +1682,6 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
	return true;
	}
	}
	- } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
	- // Fake request the condition, otherwise the intrinsic might be completely
	- // optimized away.
	- Register TmpReg = getRegForValue(BI->getCondition());
	- if (TmpReg == 0)
	- return false;
	-
	- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JCC_1))
	- .addMBB(TrueMBB).addImm(CC);
	- finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
	- return true;
	}

	// Otherwise do a clumsy setcc and re-test it.
	@@ -2092,14 +2014,6 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
	}
	}
	NeedTest = false;
	- } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
	- // Fake request the condition, otherwise the intrinsic might be completely
	- // optimized away.
	- Register TmpReg = getRegForValue(Cond);
	- if (TmpReg == 0)
	- return false;
	-
	- NeedTest = false;
	}

	if (NeedTest) {
	diff --git a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
	index 94e1db3840b1..b80b5c48064c 100644
	--- a/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
	+++ b/llvm/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
	@@ -13,6 +13,8 @@ define fastcc i32 @test() nounwind {
	; FASTISEL: ## %bb.0: ## %entry
	; FASTISEL-NEXT: movl $1, %eax
	; FASTISEL-NEXT: addl $0, %eax
	+; FASTISEL-NEXT: seto %al
	+; FASTISEL-NEXT: testb $1, %al
	; FASTISEL-NEXT: xorl %eax, %eax
	; FASTISEL-NEXT: retq
	;
	@@ -21,7 +23,8 @@ define fastcc i32 @test() nounwind {
	; AVX512F-NEXT: movl $1, %eax
	; AVX512F-NEXT: addl $0, %eax
	; AVX512F-NEXT: seto %al
	-; AVX512F-NEXT: jo LBB0_2
	+; AVX512F-NEXT: testb $1, %al
	+; AVX512F-NEXT: jne LBB0_2
	; AVX512F-NEXT: ## %bb.1: ## %BB3
	; AVX512F-NEXT: LBB0_2: ## %.backedge
	; AVX512F-NEXT: xorl %eax, %eax
	diff --git a/llvm/test/CodeGen/X86/pr54369.ll b/llvm/test/CodeGen/X86/pr54369.ll
	index 818b4f9d87ea..d0264db13feb 100644
	--- a/llvm/test/CodeGen/X86/pr54369.ll
	+++ b/llvm/test/CodeGen/X86/pr54369.ll
	@@ -1,16 +1,16 @@
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=x86_64-- -O0 < %s \| FileCheck %s

	-; FIXME: This is currently miscompiled due to an eflags clobber.
	define i64 @adder(i64 %lhs, i64 %rhs) {
	; CHECK-LABEL: adder:
	; CHECK: # %bb.0:
	; CHECK-NEXT: addq %rsi, %rdi
	-; CHECK-NEXT: seto %al
	+; CHECK-NEXT: seto %dl
	; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: # kill: def $rax killed $eax
	; CHECK-NEXT: movl $148, %ecx
	-; CHECK-NEXT: cmovoq %rcx, %rax
	+; CHECK-NEXT: testb $1, %dl
	+; CHECK-NEXT: cmovneq %rcx, %rax
	; CHECK-NEXT: retq
	%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %lhs, i64 %rhs)
	%errorbit = extractvalue { i64, i1 } %res, 1
	diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
	index 0de8bdeddd6f..bed76889d7dd 100644
	--- a/llvm/test/CodeGen/X86/xaluo.ll
	+++ b/llvm/test/CodeGen/X86/xaluo.ll
	@@ -563,7 +563,9 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) {
	; FAST-NEXT: movl %esi, %eax
	; FAST-NEXT: movl %edi, %ecx
	; FAST-NEXT: addl %esi, %ecx
	-; FAST-NEXT: cmovol %edi, %eax
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	; FAST-NEXT: retq
	%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1
	@@ -585,7 +587,9 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) {
	; FAST-NEXT: movq %rsi, %rax
	; FAST-NEXT: movq %rdi, %rcx
	; FAST-NEXT: addq %rsi, %rcx
	-; FAST-NEXT: cmovoq %rdi, %rax
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovneq %rdi, %rax
	; FAST-NEXT: retq
	%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1
	@@ -607,7 +611,9 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) {
	; FAST-NEXT: movl %esi, %eax
	; FAST-NEXT: movl %edi, %ecx
	; FAST-NEXT: addl %esi, %ecx
	-; FAST-NEXT: cmovbl %edi, %eax
	+; FAST-NEXT: setb %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	; FAST-NEXT: retq
	%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1
	@@ -629,7 +635,9 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) {
	; FAST-NEXT: movq %rsi, %rax
	; FAST-NEXT: movq %rdi, %rcx
	; FAST-NEXT: addq %rsi, %rcx
	-; FAST-NEXT: cmovbq %rdi, %rax
	+; FAST-NEXT: setb %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovneq %rdi, %rax
	; FAST-NEXT: retq
	%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1
	@@ -649,7 +657,9 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) {
	; FAST: ## %bb.0:
	; FAST-NEXT: movl %esi, %eax
	; FAST-NEXT: cmpl %esi, %edi
	-; FAST-NEXT: cmovol %edi, %eax
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	; FAST-NEXT: retq
	%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1
	@@ -669,7 +679,9 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) {
	; FAST: ## %bb.0:
	; FAST-NEXT: movq %rsi, %rax
	; FAST-NEXT: cmpq %rsi, %rdi
	-; FAST-NEXT: cmovoq %rdi, %rax
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovneq %rdi, %rax
	; FAST-NEXT: retq
	%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1
	@@ -689,7 +701,9 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) {
	; FAST: ## %bb.0:
	; FAST-NEXT: movl %esi, %eax
	; FAST-NEXT: cmpl %esi, %edi
	-; FAST-NEXT: cmovbl %edi, %eax
	+; FAST-NEXT: setb %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	; FAST-NEXT: retq
	%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
	%obit = extractvalue {i32, i1} %t, 1
	@@ -709,7 +723,9 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) {
	; FAST: ## %bb.0:
	; FAST-NEXT: movq %rsi, %rax
	; FAST-NEXT: cmpq %rsi, %rdi
	-; FAST-NEXT: cmovbq %rdi, %rax
	+; FAST-NEXT: setb %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovneq %rdi, %rax
	; FAST-NEXT: retq
	%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
	%obit = extractvalue {i64, i1} %t, 1
	@@ -735,7 +751,9 @@ define zeroext i1 @saddobri32(i32 %v1, i32 %v2) {
	; FAST-LABEL: saddobri32:
	; FAST: ## %bb.0:
	; FAST-NEXT: addl %esi, %edi
	-; FAST-NEXT: jo LBB31_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB31_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -773,7 +791,9 @@ define zeroext i1 @saddobri64(i64 %v1, i64 %v2) {
	; FAST-LABEL: saddobri64:
	; FAST: ## %bb.0:
	; FAST-NEXT: addq %rsi, %rdi
	-; FAST-NEXT: jo LBB32_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB32_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -811,7 +831,9 @@ define zeroext i1 @uaddobri32(i32 %v1, i32 %v2) {
	; FAST-LABEL: uaddobri32:
	; FAST: ## %bb.0:
	; FAST-NEXT: addl %esi, %edi
	-; FAST-NEXT: jb LBB33_1
	+; FAST-NEXT: setb %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB33_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -849,7 +871,9 @@ define zeroext i1 @uaddobri64(i64 %v1, i64 %v2) {
	; FAST-LABEL: uaddobri64:
	; FAST: ## %bb.0:
	; FAST-NEXT: addq %rsi, %rdi
	-; FAST-NEXT: jb LBB34_1
	+; FAST-NEXT: setb %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB34_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -887,7 +911,9 @@ define zeroext i1 @ssubobri32(i32 %v1, i32 %v2) {
	; FAST-LABEL: ssubobri32:
	; FAST: ## %bb.0:
	; FAST-NEXT: cmpl %esi, %edi
	-; FAST-NEXT: jo LBB35_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB35_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -925,7 +951,9 @@ define zeroext i1 @ssubobri64(i64 %v1, i64 %v2) {
	; FAST-LABEL: ssubobri64:
	; FAST: ## %bb.0:
	; FAST-NEXT: cmpq %rsi, %rdi
	-; FAST-NEXT: jo LBB36_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB36_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -963,7 +991,9 @@ define zeroext i1 @usubobri32(i32 %v1, i32 %v2) {
	; FAST-LABEL: usubobri32:
	; FAST: ## %bb.0:
	; FAST-NEXT: cmpl %esi, %edi
	-; FAST-NEXT: jb LBB37_1
	+; FAST-NEXT: setb %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB37_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -1001,7 +1031,9 @@ define zeroext i1 @usubobri64(i64 %v1, i64 %v2) {
	; FAST-LABEL: usubobri64:
	; FAST: ## %bb.0:
	; FAST-NEXT: cmpq %rsi, %rdi
	-; FAST-NEXT: jb LBB38_1
	+; FAST-NEXT: setb %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne LBB38_1
	; FAST-NEXT: ## %bb.2: ## %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -1089,10 +1121,12 @@ define i32 @incovfselectstore(i32 %v1, i32 %v2, i32* %x) {
	; FAST-LABEL: incovfselectstore:
	; FAST: ## %bb.0:
	; FAST-NEXT: movl %esi, %eax
	-; FAST-NEXT: movl %edi, %ecx
	-; FAST-NEXT: incl %ecx
	-; FAST-NEXT: cmovol %edi, %eax
	-; FAST-NEXT: movl %ecx, (%rdx)
	+; FAST-NEXT: movl %edi, %esi
	+; FAST-NEXT: incl %esi
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	+; FAST-NEXT: movl %esi, (%rdx)
	; FAST-NEXT: retq
	%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
	%obit = extractvalue {i32, i1} %t, 1
	@@ -1116,10 +1150,12 @@ define i32 @decovfselectstore(i32 %v1, i32 %v2, i32* %x) {
	; FAST-LABEL: decovfselectstore:
	; FAST: ## %bb.0:
	; FAST-NEXT: movl %esi, %eax
	-; FAST-NEXT: movl %edi, %ecx
	-; FAST-NEXT: decl %ecx
	-; FAST-NEXT: cmovol %edi, %eax
	-; FAST-NEXT: movl %ecx, (%rdx)
	+; FAST-NEXT: movl %edi, %esi
	+; FAST-NEXT: decl %esi
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	+; FAST-NEXT: movl %esi, (%rdx)
	; FAST-NEXT: retq
	;
	; KNL-LABEL: decovfselectstore:
	diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
	index 71d92af0dd94..184c4276d088 100644
	--- a/llvm/test/CodeGen/X86/xmulo.ll
	+++ b/llvm/test/CodeGen/X86/xmulo.ll
	@@ -516,13 +516,23 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
	; Check the use of the overflow bit in combination with a select instruction.
	;
	define i32 @smuloselecti32(i32 %v1, i32 %v2) {
	-; LINUX-LABEL: smuloselecti32:
	-; LINUX: # %bb.0:
	-; LINUX-NEXT: movl %esi, %eax
	-; LINUX-NEXT: movl %edi, %ecx
	-; LINUX-NEXT: imull %esi, %ecx
	-; LINUX-NEXT: cmovol %edi, %eax
	-; LINUX-NEXT: retq
	+; SDAG-LABEL: smuloselecti32:
	+; SDAG: # %bb.0:
	+; SDAG-NEXT: movl %esi, %eax
	+; SDAG-NEXT: movl %edi, %ecx
	+; SDAG-NEXT: imull %esi, %ecx
	+; SDAG-NEXT: cmovol %edi, %eax
	+; SDAG-NEXT: retq
	+;
	+; FAST-LABEL: smuloselecti32:
	+; FAST: # %bb.0:
	+; FAST-NEXT: movl %esi, %eax
	+; FAST-NEXT: movl %edi, %ecx
	+; FAST-NEXT: imull %esi, %ecx
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovnel %edi, %eax
	+; FAST-NEXT: retq
	;
	; WIN64-LABEL: smuloselecti32:
	; WIN64: # %bb.0:
	@@ -550,13 +560,23 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) {
	}

	define i64 @smuloselecti64(i64 %v1, i64 %v2) {
	-; LINUX-LABEL: smuloselecti64:
	-; LINUX: # %bb.0:
	-; LINUX-NEXT: movq %rsi, %rax
	-; LINUX-NEXT: movq %rdi, %rcx
	-; LINUX-NEXT: imulq %rsi, %rcx
	-; LINUX-NEXT: cmovoq %rdi, %rax
	-; LINUX-NEXT: retq
	+; SDAG-LABEL: smuloselecti64:
	+; SDAG: # %bb.0:
	+; SDAG-NEXT: movq %rsi, %rax
	+; SDAG-NEXT: movq %rdi, %rcx
	+; SDAG-NEXT: imulq %rsi, %rcx
	+; SDAG-NEXT: cmovoq %rdi, %rax
	+; SDAG-NEXT: retq
	+;
	+; FAST-LABEL: smuloselecti64:
	+; FAST: # %bb.0:
	+; FAST-NEXT: movq %rsi, %rax
	+; FAST-NEXT: movq %rdi, %rcx
	+; FAST-NEXT: imulq %rsi, %rcx
	+; FAST-NEXT: seto %cl
	+; FAST-NEXT: testb $1, %cl
	+; FAST-NEXT: cmovneq %rdi, %rax
	+; FAST-NEXT: retq
	;
	; WIN64-LABEL: smuloselecti64:
	; WIN64: # %bb.0:
	@@ -648,13 +668,23 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
	}

	define i32 @umuloselecti32(i32 %v1, i32 %v2) {
	-; LINUX-LABEL: umuloselecti32:
	-; LINUX: # %bb.0:
	-; LINUX-NEXT: movl %edi, %eax
	-; LINUX-NEXT: mull %esi
	-; LINUX-NEXT: cmovol %edi, %esi
	-; LINUX-NEXT: movl %esi, %eax
	-; LINUX-NEXT: retq
	+; SDAG-LABEL: umuloselecti32:
	+; SDAG: # %bb.0:
	+; SDAG-NEXT: movl %edi, %eax
	+; SDAG-NEXT: mull %esi
	+; SDAG-NEXT: cmovol %edi, %esi
	+; SDAG-NEXT: movl %esi, %eax
	+; SDAG-NEXT: retq
	+;
	+; FAST-LABEL: umuloselecti32:
	+; FAST: # %bb.0:
	+; FAST-NEXT: movl %edi, %eax
	+; FAST-NEXT: mull %esi
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: cmovnel %edi, %esi
	+; FAST-NEXT: movl %esi, %eax
	+; FAST-NEXT: retq
	;
	; WIN64-LABEL: umuloselecti32:
	; WIN64: # %bb.0:
	@@ -686,13 +716,23 @@ define i32 @umuloselecti32(i32 %v1, i32 %v2) {
	}

	define i64 @umuloselecti64(i64 %v1, i64 %v2) {
	-; LINUX-LABEL: umuloselecti64:
	-; LINUX: # %bb.0:
	-; LINUX-NEXT: movq %rdi, %rax
	-; LINUX-NEXT: mulq %rsi
	-; LINUX-NEXT: cmovoq %rdi, %rsi
	-; LINUX-NEXT: movq %rsi, %rax
	-; LINUX-NEXT: retq
	+; SDAG-LABEL: umuloselecti64:
	+; SDAG: # %bb.0:
	+; SDAG-NEXT: movq %rdi, %rax
	+; SDAG-NEXT: mulq %rsi
	+; SDAG-NEXT: cmovoq %rdi, %rsi
	+; SDAG-NEXT: movq %rsi, %rax
	+; SDAG-NEXT: retq
	+;
	+; FAST-LABEL: umuloselecti64:
	+; FAST: # %bb.0:
	+; FAST-NEXT: movq %rdi, %rax
	+; FAST-NEXT: mulq %rsi
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: cmovneq %rdi, %rsi
	+; FAST-NEXT: movq %rsi, %rax
	+; FAST-NEXT: retq
	;
	; WIN64-LABEL: umuloselecti64:
	; WIN64: # %bb.0:
	@@ -905,7 +945,9 @@ define zeroext i1 @smulobri32(i32 %v1, i32 %v2) {
	; FAST-LABEL: smulobri32:
	; FAST: # %bb.0:
	; FAST-NEXT: imull %esi, %edi
	-; FAST-NEXT: jo .LBB17_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne .LBB17_1
	; FAST-NEXT: # %bb.2: # %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -966,7 +1008,9 @@ define zeroext i1 @smulobri64(i64 %v1, i64 %v2) {
	; FAST-LABEL: smulobri64:
	; FAST: # %bb.0:
	; FAST-NEXT: imulq %rsi, %rdi
	-; FAST-NEXT: jo .LBB18_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne .LBB18_1
	; FAST-NEXT: # %bb.2: # %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -1228,7 +1272,9 @@ define zeroext i1 @umulobri32(i32 %v1, i32 %v2) {
	; FAST: # %bb.0:
	; FAST-NEXT: movl %edi, %eax
	; FAST-NEXT: mull %esi
	-; FAST-NEXT: jo .LBB21_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne .LBB21_1
	; FAST-NEXT: # %bb.2: # %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al
	@@ -1292,7 +1338,9 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
	; FAST: # %bb.0:
	; FAST-NEXT: movq %rdi, %rax
	; FAST-NEXT: mulq %rsi
	-; FAST-NEXT: jo .LBB22_1
	+; FAST-NEXT: seto %al
	+; FAST-NEXT: testb $1, %al
	+; FAST-NEXT: jne .LBB22_1
	; FAST-NEXT: # %bb.2: # %continue
	; FAST-NEXT: movb $1, %al
	; FAST-NEXT: andb $1, %al