Skip to content

Instantly share code, notes, and snippets.

@nikic

nikic/xalu.patch Secret

Created April 1, 2022 10:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nikic/78da6b1abff77031bed2a948474983c6 to your computer and use it in GitHub Desktop.
Save nikic/78da6b1abff77031bed2a948474983c6 to your computer and use it in GitHub Desktop.
commit e5cc95d8ac679e706bb7ebe1361b5c4f94eec627
Author: Nikita Popov <npopov@redhat.com>
Date: Fri Apr 1 12:44:04 2022 +0200
remove with.overflow + select fold
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 397c61261e3d..f490856856b2 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2092,14 +2092,6 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
}
}
NeedTest = false;
- } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- Register TmpReg = getRegForValue(Cond);
- if (TmpReg == 0)
- return false;
-
- NeedTest = false;
}
if (NeedTest) {
diff --git a/llvm/test/CodeGen/X86/pr54369.ll b/llvm/test/CodeGen/X86/pr54369.ll
index 818b4f9d87ea..468eaa47e8dd 100644
--- a/llvm/test/CodeGen/X86/pr54369.ll
+++ b/llvm/test/CodeGen/X86/pr54369.ll
@@ -6,11 +6,12 @@ define i64 @adder(i64 %lhs, i64 %rhs) {
; CHECK-LABEL: adder:
; CHECK: # %bb.0:
; CHECK-NEXT: addq %rsi, %rdi
-; CHECK-NEXT: seto %al
+; CHECK-NEXT: seto %dl
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: # kill: def $rax killed $eax
; CHECK-NEXT: movl $148, %ecx
-; CHECK-NEXT: cmovoq %rcx, %rax
+; CHECK-NEXT: testb $1, %dl
+; CHECK-NEXT: cmovneq %rcx, %rax
; CHECK-NEXT: retq
%res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %lhs, i64 %rhs)
%errorbit = extractvalue { i64, i1 } %res, 1
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index 0de8bdeddd6f..a65540191591 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -563,7 +563,9 @@ define i32 @saddoselecti32(i32 %v1, i32 %v2) {
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: movl %edi, %ecx
; FAST-NEXT: addl %esi, %ecx
-; FAST-NEXT: cmovol %edi, %eax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -585,7 +587,9 @@ define i64 @saddoselecti64(i64 %v1, i64 %v2) {
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: movq %rdi, %rcx
; FAST-NEXT: addq %rsi, %rcx
-; FAST-NEXT: cmovoq %rdi, %rax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -607,7 +611,9 @@ define i32 @uaddoselecti32(i32 %v1, i32 %v2) {
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: movl %edi, %ecx
; FAST-NEXT: addl %esi, %ecx
-; FAST-NEXT: cmovbl %edi, %eax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -629,7 +635,9 @@ define i64 @uaddoselecti64(i64 %v1, i64 %v2) {
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: movq %rdi, %rcx
; FAST-NEXT: addq %rsi, %rcx
-; FAST-NEXT: cmovbq %rdi, %rax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -649,7 +657,9 @@ define i32 @ssuboselecti32(i32 %v1, i32 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: cmovol %edi, %eax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -669,7 +679,9 @@ define i64 @ssuboselecti64(i64 %v1, i64 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: cmovoq %rdi, %rax
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -689,7 +701,9 @@ define i32 @usuboselecti32(i32 %v1, i32 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
; FAST-NEXT: cmpl %esi, %edi
-; FAST-NEXT: cmovbl %edi, %eax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
%obit = extractvalue {i32, i1} %t, 1
@@ -709,7 +723,9 @@ define i64 @usuboselecti64(i64 %v1, i64 %v2) {
; FAST: ## %bb.0:
; FAST-NEXT: movq %rsi, %rax
; FAST-NEXT: cmpq %rsi, %rdi
-; FAST-NEXT: cmovbq %rdi, %rax
+; FAST-NEXT: setb %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
; FAST-NEXT: retq
%t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
%obit = extractvalue {i64, i1} %t, 1
@@ -1089,10 +1105,12 @@ define i32 @incovfselectstore(i32 %v1, i32 %v2, i32* %x) {
; FAST-LABEL: incovfselectstore:
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
-; FAST-NEXT: movl %edi, %ecx
-; FAST-NEXT: incl %ecx
-; FAST-NEXT: cmovol %edi, %eax
-; FAST-NEXT: movl %ecx, (%rdx)
+; FAST-NEXT: movl %edi, %esi
+; FAST-NEXT: incl %esi
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: movl %esi, (%rdx)
; FAST-NEXT: retq
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
%obit = extractvalue {i32, i1} %t, 1
@@ -1116,10 +1134,12 @@ define i32 @decovfselectstore(i32 %v1, i32 %v2, i32* %x) {
; FAST-LABEL: decovfselectstore:
; FAST: ## %bb.0:
; FAST-NEXT: movl %esi, %eax
-; FAST-NEXT: movl %edi, %ecx
-; FAST-NEXT: decl %ecx
-; FAST-NEXT: cmovol %edi, %eax
-; FAST-NEXT: movl %ecx, (%rdx)
+; FAST-NEXT: movl %edi, %esi
+; FAST-NEXT: decl %esi
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: movl %esi, (%rdx)
; FAST-NEXT: retq
;
; KNL-LABEL: decovfselectstore:
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 71d92af0dd94..380b3d4a3521 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -516,13 +516,23 @@ define zeroext i1 @umuloi64(i64 %v1, i64 %v2, i64* %res) {
; Check the use of the overflow bit in combination with a select instruction.
;
define i32 @smuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: smuloselecti32:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movl %esi, %eax
-; LINUX-NEXT: movl %edi, %ecx
-; LINUX-NEXT: imull %esi, %ecx
-; LINUX-NEXT: cmovol %edi, %eax
-; LINUX-NEXT: retq
+; SDAG-LABEL: smuloselecti32:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: movl %edi, %ecx
+; SDAG-NEXT: imull %esi, %ecx
+; SDAG-NEXT: cmovol %edi, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloselecti32:
+; FAST: # %bb.0:
+; FAST-NEXT: movl %esi, %eax
+; FAST-NEXT: movl %edi, %ecx
+; FAST-NEXT: imull %esi, %ecx
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovnel %edi, %eax
+; FAST-NEXT: retq
;
; WIN64-LABEL: smuloselecti32:
; WIN64: # %bb.0:
@@ -550,13 +560,23 @@ define i32 @smuloselecti32(i32 %v1, i32 %v2) {
}
define i64 @smuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: smuloselecti64:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movq %rsi, %rax
-; LINUX-NEXT: movq %rdi, %rcx
-; LINUX-NEXT: imulq %rsi, %rcx
-; LINUX-NEXT: cmovoq %rdi, %rax
-; LINUX-NEXT: retq
+; SDAG-LABEL: smuloselecti64:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: movq %rdi, %rcx
+; SDAG-NEXT: imulq %rsi, %rcx
+; SDAG-NEXT: cmovoq %rdi, %rax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: smuloselecti64:
+; FAST: # %bb.0:
+; FAST-NEXT: movq %rsi, %rax
+; FAST-NEXT: movq %rdi, %rcx
+; FAST-NEXT: imulq %rsi, %rcx
+; FAST-NEXT: seto %cl
+; FAST-NEXT: testb $1, %cl
+; FAST-NEXT: cmovneq %rdi, %rax
+; FAST-NEXT: retq
;
; WIN64-LABEL: smuloselecti64:
; WIN64: # %bb.0:
@@ -648,13 +668,23 @@ define i64 @smuloselecti64(i64 %v1, i64 %v2) {
}
define i32 @umuloselecti32(i32 %v1, i32 %v2) {
-; LINUX-LABEL: umuloselecti32:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movl %edi, %eax
-; LINUX-NEXT: mull %esi
-; LINUX-NEXT: cmovol %edi, %esi
-; LINUX-NEXT: movl %esi, %eax
-; LINUX-NEXT: retq
+; SDAG-LABEL: umuloselecti32:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movl %edi, %eax
+; SDAG-NEXT: mull %esi
+; SDAG-NEXT: cmovol %edi, %esi
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloselecti32:
+; FAST: # %bb.0:
+; FAST-NEXT: movl %edi, %eax
+; FAST-NEXT: mull %esi
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: cmovnel %edi, %esi
+; FAST-NEXT: movl %esi, %eax
+; FAST-NEXT: retq
;
; WIN64-LABEL: umuloselecti32:
; WIN64: # %bb.0:
@@ -686,13 +716,23 @@ define i32 @umuloselecti32(i32 %v1, i32 %v2) {
}
define i64 @umuloselecti64(i64 %v1, i64 %v2) {
-; LINUX-LABEL: umuloselecti64:
-; LINUX: # %bb.0:
-; LINUX-NEXT: movq %rdi, %rax
-; LINUX-NEXT: mulq %rsi
-; LINUX-NEXT: cmovoq %rdi, %rsi
-; LINUX-NEXT: movq %rsi, %rax
-; LINUX-NEXT: retq
+; SDAG-LABEL: umuloselecti64:
+; SDAG: # %bb.0:
+; SDAG-NEXT: movq %rdi, %rax
+; SDAG-NEXT: mulq %rsi
+; SDAG-NEXT: cmovoq %rdi, %rsi
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: retq
+;
+; FAST-LABEL: umuloselecti64:
+; FAST: # %bb.0:
+; FAST-NEXT: movq %rdi, %rax
+; FAST-NEXT: mulq %rsi
+; FAST-NEXT: seto %al
+; FAST-NEXT: testb $1, %al
+; FAST-NEXT: cmovneq %rdi, %rsi
+; FAST-NEXT: movq %rsi, %rax
+; FAST-NEXT: retq
;
; WIN64-LABEL: umuloselecti64:
; WIN64: # %bb.0:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment