xry111/gcc-lbt.patch

## gcc-lbt.patch
From 5f9783382bbe039d1fd00fe43ff5adce3405398e Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 11 Jun 2024 18:37:27 +0800
Subject: [PATCH] [NOT FOR UPSTREAM] LBT

Note that the result is *slower* so this is just a toy project.  Maybe
we can use it if "-mlbt -Os" but I don't think it's valuable enough for
upstreaming.
---
 gcc/config/loongarch/lbt.md              | 242 +++++++++++++++++++++++
 gcc/config/loongarch/loongarch-modes.def |   3 +
 gcc/config/loongarch/loongarch.cc        |  74 ++++++-
 gcc/config/loongarch/loongarch.h         |  19 +-
 gcc/config/loongarch/loongarch.md        |   2 +
 5 files changed, 333 insertions(+), 7 deletions(-)
 create mode 100644 gcc/config/loongarch/lbt.md

diff --git a/gcc/config/loongarch/lbt.md b/gcc/config/loongarch/lbt.md
new file mode 100644
index 00000000000..ac8d1a96487
--- /dev/null
+++ b/gcc/config/loongarch/lbt.md
@@ -0,0 +1,242 @@
+(define_constants
+  [(LBT_X86_CF_REGNUM	74)
+   (LBT_X86_OF_REGNUM	75)])
+
+(define_mode_attr WIDEMODE [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
+(define_insn "lbt_clear_flags"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(if_then_else (eq (and (match_operand 0 "const_uimm6_operand" "i")
+			       (const_int 1))
+			  (const_int 0))
+		      (reg:LBTCC LBT_X86_CF_REGNUM)
+		      (const_int 0)))
+   (set (reg:LBTCC LBT_X86_OF_REGNUM)
+	(if_then_else (eq (and (match_dup 0) (const_int 32))
+			  (const_int 0))
+		      (reg:LBTCC LBT_X86_OF_REGNUM)
+		      (const_int 0)))]
+  ""
+  "x86mtflag\t$r0,%0")
+
+;; x86mul.{b/h/w/d} sets CF if a *signed* overflow happens (like x86 imul)
+(define_code_attr lbt_x86_cf_extend_mode
+  [(plus	"zero_extend")
+   (minus	"zero_extend")
+   (mult	"sign_extend")])
+
+(define_insn "lbt_x86_flag_for_<optab>_<mode>"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(ne:LBTCC (<lbt_x86_cf_extend_mode>:<WIDEMODE>
+		    (addsubmul:QHWD
+		      (match_operand:QHWD 0 "register_operand" "r")
+		      (match_operand:QHWD 1 "register_operand" "r")))
+		  (addsubmul:<WIDEMODE>
+		    (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 0))
+		    (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 1)))))
+   (set (reg:LBTCC LBT_X86_OF_REGNUM)
+	(ne:LBTCC (sign_extend:<WIDEMODE>
+		    (addsubmul:QHWD (match_dup 0) (match_dup 1)))
+		  (addsubmul:<WIDEMODE>
+		    (sign_extend:<WIDEMODE> (match_dup 0))
+		    (sign_extend:<WIDEMODE> (match_dup 1)))))]
+   ""
+   "x86<optab>.<size>\t%0,%1"
+   [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_flag_for_umul_<mode>"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(ne:LBTCC (zero_extend:<WIDEMODE>
+		    (mult:GPR
+		      (match_operand:GPR 0 "register_operand" "r")
+		      (match_operand:GPR 1 "register_operand" "r")))
+		  (mult:<WIDEMODE>
+		    (zero_extend:<WIDEMODE> (match_dup 0))
+		    (zero_extend:<WIDEMODE> (match_dup 1)))))
+   (set (reg:LBTCC LBT_X86_OF_REGNUM)
+	(ne:LBTCC (zero_extend:<WIDEMODE>
+		    (mult:GPR (match_dup 0) (match_dup 1)))
+		  (mult:<WIDEMODE>
+		    (zero_extend:<WIDEMODE> (match_dup 0))
+		    (zero_extend:<WIDEMODE> (match_dup 1)))))]
+   ""
+   "x86mul.<size>u\t%0,%1"
+   [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_setj_of_<mode>"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(if_then_else:X (ne (reg:LBTCC LBT_X86_OF_REGNUM) (const_int 0))
+			(const_int 1)
+			(const_int 0)))]
+  ""
+  "setx86j\t%0,12"
+  [(set_attr "mode" "<MODE>")])
+
+(define_expand "<optab>v<mode>4"
+  [(set (match_operand:QHWD 0 "register_operand")
+	(addsubmul:QHWD (match_operand:QHWD 1 "register_operand")
+			(match_operand:QHWD 2 "register_operand")))
+   (match_operand 3)]
+  ""
+  {
+    const auto mul_optab ATTRIBUTE_UNUSED = smul_optab;
+
+    emit_move_insn (operands[0],
+		    expand_binop (<MODE>mode, <optab>_optab,
+				  operands[1], operands[2], NULL_RTX,
+				  false, OPTAB_WIDEN));
+
+    machine_mode mode = TARGET_64BIT ? DImode : SImode;
+    rtx reg = gen_reg_rtx (mode);
+
+    emit_insn (
+      gen_lbt_x86_flag_for_<optab>_<mode> (operands[1], operands[2]));
+    emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
+			    : gen_lbt_x86_setj_of_si (reg));
+
+    rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
+    emit_jump_insn (
+      TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
+		   : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
+
+    DONE;
+  })
+
+;; We don't customize uaddvM4/usubvM4 because using LBT for them doesn't
+;; have an advantage over the default expansion.  However for umulvM4
+;; using LBT seems better, but only SI and DI are supported.
+(define_expand "umulv<mode>4"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "register_operand")
+   (match_operand:GPR 2 "register_operand")
+   (match_operand 3)]
+  ""
+  {
+    emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+
+    machine_mode mode = TARGET_64BIT ? DImode : SImode;
+    rtx reg = gen_reg_rtx (mode);
+
+    emit_insn (
+      gen_lbt_x86_flag_for_umul_<mode> (operands[1], operands[2]));
+    emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
+			    : gen_lbt_x86_setj_of_si (reg));
+
+    rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
+    emit_jump_insn (
+      TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
+		   : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
+
+    DONE;
+  })
+
+(define_insn "lbt_x86_set_cf_from_<mode>"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(ne:LBTCC (and:GPR (match_operand:GPR 0 "reg_or_0_operand" "rJ")
+			   (const_int 1))
+		  (const_int 0)))]
+  ""
+  "x86mfflag\t%0,1")
+
+(define_code_attr optab_c [(plus "adc") (minus "sbc")])
+(define_insn "lbt_<optab>c_<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(addsub:GPR
+	  (addsub:GPR
+	    (match_operand:GPR 1 "register_operand" "r")
+	    (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
+	  (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+				(const_int 0))
+			    (const_int 1)
+			    (const_int 0))))]
+  ""
+  "<optab_c>.<size>\t%0,%1,%2"
+   [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_<optab>c_<mode>"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(ne:LBTCC
+	  (zero_extend:<WIDEMODE>
+	    (addsub:GPR
+	      (addsub:GPR (match_operand:GPR 0 "register_operand" "r")
+			  (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
+	      (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+				     (const_int 0))
+				(const_int 1)
+				(const_int 0))))
+	  (addsub:<WIDEMODE>
+	    (addsub:<WIDEMODE> (zero_extend:<WIDEMODE> (match_dup 0))
+			       (zero_extend:<WIDEMODE> (match_dup 1)))
+	    (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+					 (const_int 0))
+				     (const_int 1)
+				     (const_int 0)))))
+   (set (reg:LBTCC LBT_X86_OF_REGNUM)
+	(ne:LBTCC
+	  (sign_extend:<WIDEMODE>
+	    (addsub:GPR
+	      (addsub:GPR (match_dup 0) (match_dup 1))
+	      (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+				    (const_int 0))
+				(const_int 1)
+				(const_int 0))))
+	  (addsub:<WIDEMODE>
+	    (addsub:<WIDEMODE> (sign_extend:<WIDEMODE> (match_dup 0))
+			       (sign_extend:<WIDEMODE> (match_dup 1)))
+	    (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+					 (const_int 0))
+				     (const_int 1)
+				     (const_int 0)))))]
+  ""
+  "x86<optab_c>.<size>\t%0,%1"
+  [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_setj_cf_<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM) (const_int 0))
+			      (const_int 1)
+			      (const_int 0)))]
+  ""
+  "setx86j\t%0,2"
+  [(set_attr "mode" "<MODE>")])
+
+(define_expand "u<optab>c<mode>5"
+  [(set (match_operand:GPR 0 "register_operand")
+	(addsub:GPR (addsub:GPR (match_operand:GPR 2 "register_operand")
+				(match_operand:GPR 3 "reg_or_0_operand"))
+		    (match_operand:GPR 4 "reg_or_0_operand")))
+   (match_operand:GPR 1 "register_operand")]
+  ""
+  {
+    if (operands[4] == const0_rtx)
+      {
+	emit_insn (gen_<optab><mode>3 (operands[0],
+				       operands[2], operands[3]));
+	emit_insn (gen_lbt_x86_flag_for_<optab>_<mode> (operands[2],
+							operands[3]));
+      }
+    else
+      {
+	emit_insn (gen_lbt_x86_set_cf_from_<mode> (operands[4]));
+	emit_insn (gen_lbt_<optab>c_<mode> (operands[0],
+				       operands[2], operands[3]));
+	emit_insn (gen_lbt_x86_<optab>c_<mode> (operands[2], operands[3]));
+      }
+
+    emit_insn (gen_lbt_x86_setj_cf_<mode> (operands[1]));
+    DONE;
+  })
+
+(define_insn_and_split "*lbt_x86_remove_copying_carry_to_gpr_<mode>"
+  [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+	(ne:LBTCC (if_then_else:GPR
+		    (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+			(const_int 0))
+		    (const_int 1)
+		    (const_int 0))
+		  (const_int 0)))]
+  "loongarch_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  "emit_note (NOTE_INSN_DELETED); DONE;")
diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
index 64caa8d6698..ba5f441cf5f 100644
--- a/gcc/config/loongarch/loongarch-modes.def
+++ b/gcc/config/loongarch/loongarch-modes.def
@@ -24,6 +24,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format);
 /* For floating point conditions in FCC registers.  */
 CC_MODE (FCC);

+/* LBT flags.  */
+CC_MODE (LBTCC);
+
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);	      /* V4QI  V2HI      */
 VECTOR_MODES (INT, 8);	      /* V8QI  V4HI V2SI */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 6ec3ee62502..1fe26ec92c5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -205,7 +205,7 @@ const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
     FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
     FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
     FCC_REGS,	FCC_REGS,	FCC_REGS,	FCC_REGS,
-    FRAME_REGS,	FRAME_REGS
+    FRAME_REGS,	FRAME_REGS,	LBT_FLAG_REGS,	LBT_FLAG_REGS,
 };

 /* Information about a single argument.  */
@@ -3854,6 +3854,27 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case UNGT:
     case UNLE:
     case UNLT:
+      /* Special case for lbt_x86_redundant_cf_to_gpr in lbt.md.
+	 TODO it'd be better to handle this in TARGET_INSN_COST once
+	 PR113325 is resolved.  */
+      if (mode == LBTCCmode && code == NE)
+	{
+	  rtx inner = XEXP (x, 0);
+	  if (GET_CODE (inner) == IF_THEN_ELSE
+	      && XEXP (inner, 1) == const1_rtx
+	      && XEXP (inner, 2) == const0_rtx)
+	    {
+	      inner = XEXP (inner, 0);
+	      if (GET_CODE (inner) == NE
+		  && GET_MODE (XEXP (inner, 0)) == mode
+		  && XEXP (inner, 1) == const0_rtx)
+		{
+		  *total = 0;
+		  return true;
+		}
+	    }
+	}
+
       /* Branch comparisons have VOIDmode, so use the first operand's
 	 mode instead.  */
       mode = GET_MODE (XEXP (x, 0));
@@ -6685,6 +6706,9 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
   if (mode == FCCmode)
     return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);

+  if (mode == LBTCCmode)
+    return regno == LBT_X86_CF_REGNUM || regno == LBT_X86_OF_REGNUM;
+
   size = GET_MODE_SIZE (mode);
   mclass = GET_MODE_CLASS (mode);

@@ -10977,6 +11001,46 @@ loongarch_optab_supported_p (int op, machine_mode, machine_mode,
     }
 }

+/* Implement the TARGET_MIN_ARITHMETIC_PRECISION hook.  */
+static unsigned int
+loongarch_min_arithmetic_precision (void)
+{
+  /* Note that this target hook is designed to provide info about the
+     condition codes from arithmetic.  On LoongArch the condition codes
+     are generated by LBT, so we return 8 here because we have "x86add.q"
+     etc. despite we don't have "add.q".  */
+  return 8;
+}
+
+static HARD_REG_SET
+loongarch_zero_call_used_regs (HARD_REG_SET regs)
+{
+  int lbt_mask = 0;
+  HARD_REG_SET zeroed;
+
+  CLEAR_HARD_REG_SET (zeroed);
+
+  static CONSTEXPR const std::pair<int, int> lbt[] = {
+    {LBT_X86_CF_REGNUM, 0x1},
+    {LBT_X86_OF_REGNUM, 0x20},
+  };
+
+  for (auto p: lbt)
+    {
+      int regno = p.first;
+      if (TEST_HARD_REG_BIT (regs, regno))
+	{
+	  SET_HARD_REG_BIT (zeroed, regno);
+	  CLEAR_HARD_REG_BIT (regs, regno);
+	  lbt_mask |= p.second;
+	}
+    }
+
+  emit_insn (gen_lbt_clear_flags (GEN_INT (lbt_mask)));
+
+  return zeroed | default_zero_call_used_regs (regs);
+}
+
 /* If -fverbose-asm, dump some info for debugging.  */
 static void
 loongarch_asm_code_end (void)
@@ -11263,6 +11327,14 @@ loongarch_asm_code_end (void)
 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
   loongarch_builtin_support_vector_misalignment

+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION \
+  loongarch_min_arithmetic_precision
+
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS \
+  loongarch_zero_call_used_regs
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 #include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f7fe950f333..4985d5dd35e 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -305,9 +305,10 @@ along with GCC; see the file COPYING3.  If not see
    - 2 fake registers:
 	- ARG_POINTER_REGNUM
 	- FRAME_POINTER_REGNUM
+   - 2 LBT status registers (x86-style CF and OF; others not supported yet)
 */

-#define FIRST_PSEUDO_REGISTER 74
+#define FIRST_PSEUDO_REGISTER 76

 /* zero, tp, sp and x are fixed.  */
 #define FIXED_REGISTERS							\
@@ -318,7 +319,7 @@ along with GCC; see the file COPYING3.  If not see
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,			\
   /* Others.  */							\
-  0, 0, 0, 0, 0, 0, 0, 0, 1, 1}
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}

 /* The call RTLs themselves clobber ra.  */
 #define CALL_USED_REGISTERS						\
@@ -329,7 +330,7 @@ along with GCC; see the file COPYING3.  If not see
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,			\
   1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,			\
   /* Others.  */							\
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}

 /* Internal macros to classify a register number as to whether it's a
    general purpose register, a floating point register, or a status
@@ -387,6 +388,9 @@ along with GCC; see the file COPYING3.  If not see
 #define ARG_POINTER_REGNUM 72
 #define FRAME_POINTER_REGNUM 73

+#define LBT_X86_CF_REGNUM 74
+#define LBT_X86_OF_REGNUM 75
+
 #define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22)

 #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
@@ -449,6 +453,7 @@ enum reg_class
   FP_REGS,	  /* floating point registers  */
   FCC_REGS,	  /* status registers (fp status)  */
   FRAME_REGS,	  /* arg pointer and frame pointer  */
+  LBT_FLAG_REGS,  /* LBT flag registers */
   ALL_REGS,	  /* all registers  */
   LIM_REG_CLASSES /* max value + 1  */
 };
@@ -471,6 +476,7 @@ enum reg_class
   "FP_REGS",								\
   "FCC_REGS",								\
   "FRAME_REGS",								\
+  "LBT_FLAG_REGS",							\
   "ALL_REGS"								\
 }

@@ -495,7 +501,8 @@ enum reg_class
   { 0x00000000, 0xffffffff, 0x00000000 },	/* FP_REGS  */		\
   { 0x00000000, 0x00000000, 0x000000ff },	/* FCC_REGS  */		\
   { 0x00000000, 0x00000000, 0x00000300 },	/* FRAME_REGS  */	\
-  { 0xffffffff, 0xffffffff, 0x000003ff }	/* ALL_REGS  */		\
+  { 0x00000000, 0x00000000, 0x00000c00 },	/* LBT_FLAG_REGS */	\
+  { 0xffffffff, 0xffffffff, 0x00000fff }	/* ALL_REGS  */		\
 }

 /* A C expression whose value is a register class containing hard
@@ -535,7 +542,7 @@ enum reg_class
   56, 57, 58, 59, 60, 61, 62, 63,					\
   /* None of the remaining classes have defined call-saved		\
      registers.  */							\
-  64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75}

 #define IMM_BITS 12
 #define IMM_REACH (HOST_WIDE_INT_1 << IMM_BITS)
@@ -908,7 +915,7 @@ typedef struct {
   "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",	  \
   "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",	  \
   "$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7",	  \
-  "$arg", "$frame"}
+  "$arg", "$frame", "$cf", "$of"}

 /* This macro defines additional names for hard registers.  */

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 25c1d323ba0..be48f208d76 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4413,6 +4413,8 @@ (define_insn_and_rewrite "simple_store<mode>"
 ; The LoongArch SIMD Instructions.
 (include "simd.md")

+(include "lbt.md")
+
 (define_c_enum "unspec" [
   UNSPEC_ADDRESS_FIRST
 ])
--
2.45.2
	From 5f9783382bbe039d1fd00fe43ff5adce3405398e Mon Sep 17 00:00:00 2001
	From: Xi Ruoyao <xry111@xry111.site>
	Date: Tue, 11 Jun 2024 18:37:27 +0800
	Subject: [PATCH] [NOT FOR UPSTREAM] LBT

	Note that the result is slower so this is just a toy project. Maybe
	we can use it if "-mlbt -Os" but I don't think it's valuable enough for
	upstreaming.
	---
	gcc/config/loongarch/lbt.md \| 242 +++++++++++++++++++++++
	gcc/config/loongarch/loongarch-modes.def \| 3 +
	gcc/config/loongarch/loongarch.cc \| 74 ++++++-
	gcc/config/loongarch/loongarch.h \| 19 +-
	gcc/config/loongarch/loongarch.md \| 2 +
	5 files changed, 333 insertions(+), 7 deletions(-)
	create mode 100644 gcc/config/loongarch/lbt.md

	diff --git a/gcc/config/loongarch/lbt.md b/gcc/config/loongarch/lbt.md
	new file mode 100644
	index 00000000000..ac8d1a96487
	--- /dev/null
	+++ b/gcc/config/loongarch/lbt.md
	@@ -0,0 +1,242 @@
	+(define_constants
	+ [(LBT_X86_CF_REGNUM 74)
	+ (LBT_X86_OF_REGNUM 75)])
	+
	+(define_mode_attr WIDEMODE [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
	+
	+(define_insn "lbt_clear_flags"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (if_then_else (eq (and (match_operand 0 "const_uimm6_operand" "i")
	+ (const_int 1))
	+ (const_int 0))
	+ (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0)))
	+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
	+ (if_then_else (eq (and (match_dup 0) (const_int 32))
	+ (const_int 0))
	+ (reg:LBTCC LBT_X86_OF_REGNUM)
	+ (const_int 0)))]
	+ ""
	+ "x86mtflag\t$r0,%0")
	+
	+;; x86mul.{b/h/w/d} sets CF if a signed overflow happens (like x86 imul)
	+(define_code_attr lbt_x86_cf_extend_mode
	+ [(plus "zero_extend")
	+ (minus "zero_extend")
	+ (mult "sign_extend")])
	+
	+(define_insn "lbt_x86_flag_for_<optab>_<mode>"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (ne:LBTCC (<lbt_x86_cf_extend_mode>:<WIDEMODE>
	+ (addsubmul:QHWD
	+ (match_operand:QHWD 0 "register_operand" "r")
	+ (match_operand:QHWD 1 "register_operand" "r")))
	+ (addsubmul:<WIDEMODE>
	+ (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 0))
	+ (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 1)))))
	+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
	+ (ne:LBTCC (sign_extend:<WIDEMODE>
	+ (addsubmul:QHWD (match_dup 0) (match_dup 1)))
	+ (addsubmul:<WIDEMODE>
	+ (sign_extend:<WIDEMODE> (match_dup 0))
	+ (sign_extend:<WIDEMODE> (match_dup 1)))))]
	+ ""
	+ "x86<optab>.<size>\t%0,%1"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_insn "lbt_x86_flag_for_umul_<mode>"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (ne:LBTCC (zero_extend:<WIDEMODE>
	+ (mult:GPR
	+ (match_operand:GPR 0 "register_operand" "r")
	+ (match_operand:GPR 1 "register_operand" "r")))
	+ (mult:<WIDEMODE>
	+ (zero_extend:<WIDEMODE> (match_dup 0))
	+ (zero_extend:<WIDEMODE> (match_dup 1)))))
	+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
	+ (ne:LBTCC (zero_extend:<WIDEMODE>
	+ (mult:GPR (match_dup 0) (match_dup 1)))
	+ (mult:<WIDEMODE>
	+ (zero_extend:<WIDEMODE> (match_dup 0))
	+ (zero_extend:<WIDEMODE> (match_dup 1)))))]
	+ ""
	+ "x86mul.<size>u\t%0,%1"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_insn "lbt_x86_setj_of_<mode>"
	+ [(set (match_operand:X 0 "register_operand" "=r")
	+ (if_then_else:X (ne (reg:LBTCC LBT_X86_OF_REGNUM) (const_int 0))
	+ (const_int 1)
	+ (const_int 0)))]
	+ ""
	+ "setx86j\t%0,12"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_expand "<optab>v<mode>4"
	+ [(set (match_operand:QHWD 0 "register_operand")
	+ (addsubmul:QHWD (match_operand:QHWD 1 "register_operand")
	+ (match_operand:QHWD 2 "register_operand")))
	+ (match_operand 3)]
	+ ""
	+ {
	+ const auto mul_optab ATTRIBUTE_UNUSED = smul_optab;
	+
	+ emit_move_insn (operands[0],
	+ expand_binop (<MODE>mode, <optab>_optab,
	+ operands[1], operands[2], NULL_RTX,
	+ false, OPTAB_WIDEN));
	+
	+ machine_mode mode = TARGET_64BIT ? DImode : SImode;
	+ rtx reg = gen_reg_rtx (mode);
	+
	+ emit_insn (
	+ gen_lbt_x86_flag_for_<optab>_<mode> (operands[1], operands[2]));
	+ emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
	+ : gen_lbt_x86_setj_of_si (reg));
	+
	+ rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
	+ emit_jump_insn (
	+ TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
	+ : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
	+
	+ DONE;
	+ })
	+
	+;; We don't customize uaddvM4/usubvM4 because using LBT for them doesn't
	+;; have an advantage over the default expansion. However for umulvM4
	+;; using LBT seems better, but only SI and DI are supported.
	+(define_expand "umulv<mode>4"
	+ [(match_operand:GPR 0 "register_operand")
	+ (match_operand:GPR 1 "register_operand")
	+ (match_operand:GPR 2 "register_operand")
	+ (match_operand 3)]
	+ ""
	+ {
	+ emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
	+
	+ machine_mode mode = TARGET_64BIT ? DImode : SImode;
	+ rtx reg = gen_reg_rtx (mode);
	+
	+ emit_insn (
	+ gen_lbt_x86_flag_for_umul_<mode> (operands[1], operands[2]));
	+ emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
	+ : gen_lbt_x86_setj_of_si (reg));
	+
	+ rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
	+ emit_jump_insn (
	+ TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
	+ : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
	+
	+ DONE;
	+ })
	+
	+(define_insn "lbt_x86_set_cf_from_<mode>"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (ne:LBTCC (and:GPR (match_operand:GPR 0 "reg_or_0_operand" "rJ")
	+ (const_int 1))
	+ (const_int 0)))]
	+ ""
	+ "x86mfflag\t%0,1")
	+
	+(define_code_attr optab_c [(plus "adc") (minus "sbc")])
	+(define_insn "lbt_<optab>c_<mode>"
	+ [(set (match_operand:GPR 0 "register_operand" "=r")
	+ (addsub:GPR
	+ (addsub:GPR
	+ (match_operand:GPR 1 "register_operand" "r")
	+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
	+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0))))]
	+ ""
	+ "<optab_c>.<size>\t%0,%1,%2"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_insn "lbt_x86_<optab>c_<mode>"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (ne:LBTCC
	+ (zero_extend:<WIDEMODE>
	+ (addsub:GPR
	+ (addsub:GPR (match_operand:GPR 0 "register_operand" "r")
	+ (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
	+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0))))
	+ (addsub:<WIDEMODE>
	+ (addsub:<WIDEMODE> (zero_extend:<WIDEMODE> (match_dup 0))
	+ (zero_extend:<WIDEMODE> (match_dup 1)))
	+ (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0)))))
	+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
	+ (ne:LBTCC
	+ (sign_extend:<WIDEMODE>
	+ (addsub:GPR
	+ (addsub:GPR (match_dup 0) (match_dup 1))
	+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0))))
	+ (addsub:<WIDEMODE>
	+ (addsub:<WIDEMODE> (sign_extend:<WIDEMODE> (match_dup 0))
	+ (sign_extend:<WIDEMODE> (match_dup 1)))
	+ (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0)))))]
	+ ""
	+ "x86<optab_c>.<size>\t%0,%1"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_insn "lbt_x86_setj_cf_<mode>"
	+ [(set (match_operand:GPR 0 "register_operand" "=r")
	+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM) (const_int 0))
	+ (const_int 1)
	+ (const_int 0)))]
	+ ""
	+ "setx86j\t%0,2"
	+ [(set_attr "mode" "<MODE>")])
	+
	+(define_expand "u<optab>c<mode>5"
	+ [(set (match_operand:GPR 0 "register_operand")
	+ (addsub:GPR (addsub:GPR (match_operand:GPR 2 "register_operand")
	+ (match_operand:GPR 3 "reg_or_0_operand"))
	+ (match_operand:GPR 4 "reg_or_0_operand")))
	+ (match_operand:GPR 1 "register_operand")]
	+ ""
	+ {
	+ if (operands[4] == const0_rtx)
	+ {
	+ emit_insn (gen_<optab><mode>3 (operands[0],
	+ operands[2], operands[3]));
	+ emit_insn (gen_lbt_x86_flag_for_<optab>_<mode> (operands[2],
	+ operands[3]));
	+ }
	+ else
	+ {
	+ emit_insn (gen_lbt_x86_set_cf_from_<mode> (operands[4]));
	+ emit_insn (gen_lbt_<optab>c_<mode> (operands[0],
	+ operands[2], operands[3]));
	+ emit_insn (gen_lbt_x86_<optab>c_<mode> (operands[2], operands[3]));
	+ }
	+
	+ emit_insn (gen_lbt_x86_setj_cf_<mode> (operands[1]));
	+ DONE;
	+ })
	+
	+(define_insn_and_split "*lbt_x86_remove_copying_carry_to_gpr_<mode>"
	+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (ne:LBTCC (if_then_else:GPR
	+ (ne (reg:LBTCC LBT_X86_CF_REGNUM)
	+ (const_int 0))
	+ (const_int 1)
	+ (const_int 0))
	+ (const_int 0)))]
	+ "loongarch_pre_reload_split ()"
	+ "#"
	+ "&& 1"
	+ [(const_int 0)]
	+ "emit_note (NOTE_INSN_DELETED); DONE;")
	diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
	index 64caa8d6698..ba5f441cf5f 100644
	--- a/gcc/config/loongarch/loongarch-modes.def
	+++ b/gcc/config/loongarch/loongarch-modes.def
	@@ -24,6 +24,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format);
	/* For floating point conditions in FCC registers. */
	CC_MODE (FCC);

	+/* LBT flags. */
	+CC_MODE (LBTCC);
	+
	/* Vector modes. */
	VECTOR_MODES (INT, 4); /* V4QI V2HI */
	VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
	diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
	index 6ec3ee62502..1fe26ec92c5 100644
	--- a/gcc/config/loongarch/loongarch.cc
	+++ b/gcc/config/loongarch/loongarch.cc
	@@ -205,7 +205,7 @@ const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
	FP_REGS, FP_REGS, FP_REGS, FP_REGS,
	FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
	FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
	- FRAME_REGS, FRAME_REGS
	+ FRAME_REGS, FRAME_REGS, LBT_FLAG_REGS, LBT_FLAG_REGS,
	};

	/* Information about a single argument. */
	@@ -3854,6 +3854,27 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
	case UNGT:
	case UNLE:
	case UNLT:
	+ /* Special case for lbt_x86_redundant_cf_to_gpr in lbt.md.
	+ TODO it'd be better to handle this in TARGET_INSN_COST once
	+ PR113325 is resolved. */
	+ if (mode == LBTCCmode && code == NE)
	+ {
	+ rtx inner = XEXP (x, 0);
	+ if (GET_CODE (inner) == IF_THEN_ELSE
	+ && XEXP (inner, 1) == const1_rtx
	+ && XEXP (inner, 2) == const0_rtx)
	+ {
	+ inner = XEXP (inner, 0);
	+ if (GET_CODE (inner) == NE
	+ && GET_MODE (XEXP (inner, 0)) == mode
	+ && XEXP (inner, 1) == const0_rtx)
	+ {
	+ *total = 0;
	+ return true;
	+ }
	+ }
	+ }
	+
	/* Branch comparisons have VOIDmode, so use the first operand's
	mode instead. */
	mode = GET_MODE (XEXP (x, 0));
	@@ -6685,6 +6706,9 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
	if (mode == FCCmode)
	return FCC_REG_P (regno) \|\| GP_REG_P (regno) \|\| FP_REG_P (regno);

	+ if (mode == LBTCCmode)
	+ return regno == LBT_X86_CF_REGNUM \|\| regno == LBT_X86_OF_REGNUM;
	+
	size = GET_MODE_SIZE (mode);
	mclass = GET_MODE_CLASS (mode);

	@@ -10977,6 +11001,46 @@ loongarch_optab_supported_p (int op, machine_mode, machine_mode,
	}
	}

	+/* Implement the TARGET_MIN_ARITHMETIC_PRECISION hook. */
	+static unsigned int
	+loongarch_min_arithmetic_precision (void)
	+{
	+ /* Note that this target hook is designed to provide info about the
	+ condition codes from arithmetic. On LoongArch the condition codes
	+ are generated by LBT, so we return 8 here because we have "x86add.q"
	+ etc. despite we don't have "add.q". */
	+ return 8;
	+}
	+
	+static HARD_REG_SET
	+loongarch_zero_call_used_regs (HARD_REG_SET regs)
	+{
	+ int lbt_mask = 0;
	+ HARD_REG_SET zeroed;
	+
	+ CLEAR_HARD_REG_SET (zeroed);
	+
	+ static CONSTEXPR const std::pair<int, int> lbt[] = {
	+ {LBT_X86_CF_REGNUM, 0x1},
	+ {LBT_X86_OF_REGNUM, 0x20},
	+ };
	+
	+ for (auto p: lbt)
	+ {
	+ int regno = p.first;
	+ if (TEST_HARD_REG_BIT (regs, regno))
	+ {
	+ SET_HARD_REG_BIT (zeroed, regno);
	+ CLEAR_HARD_REG_BIT (regs, regno);
	+ lbt_mask \|= p.second;
	+ }
	+ }
	+
	+ emit_insn (gen_lbt_clear_flags (GEN_INT (lbt_mask)));
	+
	+ return zeroed \| default_zero_call_used_regs (regs);
	+}
	+
	/* If -fverbose-asm, dump some info for debugging. */
	static void
	loongarch_asm_code_end (void)
	@@ -11263,6 +11327,14 @@ loongarch_asm_code_end (void)
	#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
	loongarch_builtin_support_vector_misalignment

	+#undef TARGET_MIN_ARITHMETIC_PRECISION
	+#define TARGET_MIN_ARITHMETIC_PRECISION \
	+ loongarch_min_arithmetic_precision
	+
	+#undef TARGET_ZERO_CALL_USED_REGS
	+#define TARGET_ZERO_CALL_USED_REGS \
	+ loongarch_zero_call_used_regs
	+
	struct gcc_target targetm = TARGET_INITIALIZER;

	#include "gt-loongarch.h"
	diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
	index f7fe950f333..4985d5dd35e 100644
	--- a/gcc/config/loongarch/loongarch.h
	+++ b/gcc/config/loongarch/loongarch.h
	@@ -305,9 +305,10 @@ along with GCC; see the file COPYING3. If not see
	- 2 fake registers:
	- ARG_POINTER_REGNUM
	- FRAME_POINTER_REGNUM
	+ - 2 LBT status registers (x86-style CF and OF; others not supported yet)
	*/

	-#define FIRST_PSEUDO_REGISTER 74
	+#define FIRST_PSEUDO_REGISTER 76

	/* zero, tp, sp and x are fixed. */
	#define FIXED_REGISTERS \
	@@ -318,7 +319,7 @@ along with GCC; see the file COPYING3. If not see
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	/* Others. */ \
	- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}
	+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}

	/* The call RTLs themselves clobber ra. */
	#define CALL_USED_REGISTERS \
	@@ -329,7 +330,7 @@ along with GCC; see the file COPYING3. If not see
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
	1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
	/* Others. */ \
	- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
	+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}

	/* Internal macros to classify a register number as to whether it's a
	general purpose register, a floating point register, or a status
	@@ -387,6 +388,9 @@ along with GCC; see the file COPYING3. If not see
	#define ARG_POINTER_REGNUM 72
	#define FRAME_POINTER_REGNUM 73

	+#define LBT_X86_CF_REGNUM 74
	+#define LBT_X86_OF_REGNUM 75
	+
	#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22)

	#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
	@@ -449,6 +453,7 @@ enum reg_class
	FP_REGS, /* floating point registers */
	FCC_REGS, /* status registers (fp status) */
	FRAME_REGS, /* arg pointer and frame pointer */
	+ LBT_FLAG_REGS, /* LBT flag registers */
	ALL_REGS, /* all registers */
	LIM_REG_CLASSES /* max value + 1 */
	};
	@@ -471,6 +476,7 @@ enum reg_class
	"FP_REGS", \
	"FCC_REGS", \
	"FRAME_REGS", \
	+ "LBT_FLAG_REGS", \
	"ALL_REGS" \
	}

	@@ -495,7 +501,8 @@ enum reg_class
	{ 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \
	{ 0x00000000, 0x00000000, 0x000000ff }, /* FCC_REGS */ \
	{ 0x00000000, 0x00000000, 0x00000300 }, /* FRAME_REGS */ \
	- { 0xffffffff, 0xffffffff, 0x000003ff } /* ALL_REGS */ \
	+ { 0x00000000, 0x00000000, 0x00000c00 }, /* LBT_FLAG_REGS */ \
	+ { 0xffffffff, 0xffffffff, 0x00000fff } /* ALL_REGS */ \
	}

	/* A C expression whose value is a register class containing hard
	@@ -535,7 +542,7 @@ enum reg_class
	56, 57, 58, 59, 60, 61, 62, 63, \
	/* None of the remaining classes have defined call-saved \
	registers. */ \
	- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
	+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75}

	#define IMM_BITS 12
	#define IMM_REACH (HOST_WIDE_INT_1 << IMM_BITS)
	@@ -908,7 +915,7 @@ typedef struct {
	"$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \
	"$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \
	"$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \
	- "$arg", "$frame"}
	+ "$arg", "$frame", "$cf", "$of"}

	/* This macro defines additional names for hard registers. */

	diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
	index 25c1d323ba0..be48f208d76 100644
	--- a/gcc/config/loongarch/loongarch.md
	+++ b/gcc/config/loongarch/loongarch.md
	@@ -4413,6 +4413,8 @@ (define_insn_and_rewrite "simple_store<mode>"
	; The LoongArch SIMD Instructions.
	(include "simd.md")

	+(include "lbt.md")
	+
	(define_c_enum "unspec" [
	UNSPEC_ADDRESS_FIRST
	])
	--
	2.45.2