Skip to content

Instantly share code, notes, and snippets.

@xry111
Created July 22, 2024 10:14
Show Gist options
  • Save xry111/88fa001fa0ac0a2e78f2cbf3b9f0839a to your computer and use it in GitHub Desktop.
Save xry111/88fa001fa0ac0a2e78f2cbf3b9f0839a to your computer and use it in GitHub Desktop.
Useless patch to make GCC generate LBT instructions
From 5f9783382bbe039d1fd00fe43ff5adce3405398e Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 11 Jun 2024 18:37:27 +0800
Subject: [PATCH] [NOT FOR UPSTREAM] LBT
Note that the result is *slower* so this is just a toy project. Maybe
we can use it if "-mlbt -Os" but I don't think it's valuable enough for
upstreaming.
---
gcc/config/loongarch/lbt.md | 242 +++++++++++++++++++++++
gcc/config/loongarch/loongarch-modes.def | 3 +
gcc/config/loongarch/loongarch.cc | 74 ++++++-
gcc/config/loongarch/loongarch.h | 19 +-
gcc/config/loongarch/loongarch.md | 2 +
5 files changed, 333 insertions(+), 7 deletions(-)
create mode 100644 gcc/config/loongarch/lbt.md
diff --git a/gcc/config/loongarch/lbt.md b/gcc/config/loongarch/lbt.md
new file mode 100644
index 00000000000..ac8d1a96487
--- /dev/null
+++ b/gcc/config/loongarch/lbt.md
@@ -0,0 +1,242 @@
+(define_constants
+ [(LBT_X86_CF_REGNUM 74)
+ (LBT_X86_OF_REGNUM 75)])
+
+(define_mode_attr WIDEMODE [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
+
+(define_insn "lbt_clear_flags"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (if_then_else (eq (and (match_operand 0 "const_uimm6_operand" "i")
+ (const_int 1))
+ (const_int 0))
+ (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0)))
+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
+ (if_then_else (eq (and (match_dup 0) (const_int 32))
+ (const_int 0))
+ (reg:LBTCC LBT_X86_OF_REGNUM)
+ (const_int 0)))]
+ ""
+ "x86mtflag\t$r0,%0")
+
+;; x86mul.{b/h/w/d} sets CF if a *signed* overflow happens (like x86 imul)
+(define_code_attr lbt_x86_cf_extend_mode
+ [(plus "zero_extend")
+ (minus "zero_extend")
+ (mult "sign_extend")])
+
+(define_insn "lbt_x86_flag_for_<optab>_<mode>"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (ne:LBTCC (<lbt_x86_cf_extend_mode>:<WIDEMODE>
+ (addsubmul:QHWD
+ (match_operand:QHWD 0 "register_operand" "r")
+ (match_operand:QHWD 1 "register_operand" "r")))
+ (addsubmul:<WIDEMODE>
+ (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 0))
+ (<lbt_x86_cf_extend_mode>:<WIDEMODE> (match_dup 1)))))
+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
+ (ne:LBTCC (sign_extend:<WIDEMODE>
+ (addsubmul:QHWD (match_dup 0) (match_dup 1)))
+ (addsubmul:<WIDEMODE>
+ (sign_extend:<WIDEMODE> (match_dup 0))
+ (sign_extend:<WIDEMODE> (match_dup 1)))))]
+ ""
+ "x86<optab>.<size>\t%0,%1"
+ [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_flag_for_umul_<mode>"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (ne:LBTCC (zero_extend:<WIDEMODE>
+ (mult:GPR
+ (match_operand:GPR 0 "register_operand" "r")
+ (match_operand:GPR 1 "register_operand" "r")))
+ (mult:<WIDEMODE>
+ (zero_extend:<WIDEMODE> (match_dup 0))
+ (zero_extend:<WIDEMODE> (match_dup 1)))))
+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
+ (ne:LBTCC (zero_extend:<WIDEMODE>
+ (mult:GPR (match_dup 0) (match_dup 1)))
+ (mult:<WIDEMODE>
+ (zero_extend:<WIDEMODE> (match_dup 0))
+ (zero_extend:<WIDEMODE> (match_dup 1)))))]
+ ""
+ "x86mul.<size>u\t%0,%1"
+ [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_setj_of_<mode>"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (if_then_else:X (ne (reg:LBTCC LBT_X86_OF_REGNUM) (const_int 0))
+ (const_int 1)
+ (const_int 0)))]
+ ""
+ "setx86j\t%0,12"
+ [(set_attr "mode" "<MODE>")])
+
+(define_expand "<optab>v<mode>4"
+ [(set (match_operand:QHWD 0 "register_operand")
+ (addsubmul:QHWD (match_operand:QHWD 1 "register_operand")
+ (match_operand:QHWD 2 "register_operand")))
+ (match_operand 3)]
+ ""
+ {
+ const auto mul_optab ATTRIBUTE_UNUSED = smul_optab;
+
+ emit_move_insn (operands[0],
+ expand_binop (<MODE>mode, <optab>_optab,
+ operands[1], operands[2], NULL_RTX,
+ false, OPTAB_WIDEN));
+
+ machine_mode mode = TARGET_64BIT ? DImode : SImode;
+ rtx reg = gen_reg_rtx (mode);
+
+ emit_insn (
+ gen_lbt_x86_flag_for_<optab>_<mode> (operands[1], operands[2]));
+ emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
+ : gen_lbt_x86_setj_of_si (reg));
+
+ rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
+ emit_jump_insn (
+ TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
+ : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
+
+ DONE;
+ })
+
+;; We don't customize uaddvM4/usubvM4 because using LBT for them doesn't
+;; have an advantage over the default expansion. However for umulvM4
+;; using LBT seems better, but only SI and DI are supported.
+(define_expand "umulv<mode>4"
+ [(match_operand:GPR 0 "register_operand")
+ (match_operand:GPR 1 "register_operand")
+ (match_operand:GPR 2 "register_operand")
+ (match_operand 3)]
+ ""
+ {
+ emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+
+ machine_mode mode = TARGET_64BIT ? DImode : SImode;
+ rtx reg = gen_reg_rtx (mode);
+
+ emit_insn (
+ gen_lbt_x86_flag_for_umul_<mode> (operands[1], operands[2]));
+ emit_insn (TARGET_64BIT ? gen_lbt_x86_setj_of_di (reg)
+ : gen_lbt_x86_setj_of_si (reg));
+
+ rtx test = gen_rtx_NE (VOIDmode, reg, const0_rtx);
+ emit_jump_insn (
+ TARGET_64BIT ? gen_cbranchdi4 (test, reg, const0_rtx, operands[3])
+ : gen_cbranchsi4 (test, reg, const0_rtx, operands[3]));
+
+ DONE;
+ })
+
+(define_insn "lbt_x86_set_cf_from_<mode>"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (ne:LBTCC (and:GPR (match_operand:GPR 0 "reg_or_0_operand" "rJ")
+ (const_int 1))
+ (const_int 0)))]
+ ""
+ "x86mfflag\t%0,1")
+
+(define_code_attr optab_c [(plus "adc") (minus "sbc")])
+(define_insn "lbt_<optab>c_<mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (addsub:GPR
+ (addsub:GPR
+ (match_operand:GPR 1 "register_operand" "r")
+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))]
+ ""
+ "<optab_c>.<size>\t%0,%1,%2"
+ [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_<optab>c_<mode>"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (ne:LBTCC
+ (zero_extend:<WIDEMODE>
+ (addsub:GPR
+ (addsub:GPR (match_operand:GPR 0 "register_operand" "r")
+ (match_operand:GPR 1 "reg_or_0_operand" "rJ"))
+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))
+ (addsub:<WIDEMODE>
+ (addsub:<WIDEMODE> (zero_extend:<WIDEMODE> (match_dup 0))
+ (zero_extend:<WIDEMODE> (match_dup 1)))
+ (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0)))))
+ (set (reg:LBTCC LBT_X86_OF_REGNUM)
+ (ne:LBTCC
+ (sign_extend:<WIDEMODE>
+ (addsub:GPR
+ (addsub:GPR (match_dup 0) (match_dup 1))
+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))))
+ (addsub:<WIDEMODE>
+ (addsub:<WIDEMODE> (sign_extend:<WIDEMODE> (match_dup 0))
+ (sign_extend:<WIDEMODE> (match_dup 1)))
+ (if_then_else:<WIDEMODE> (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0)))))]
+ ""
+ "x86<optab_c>.<size>\t%0,%1"
+ [(set_attr "mode" "<MODE>")])
+
+(define_insn "lbt_x86_setj_cf_<mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (if_then_else:GPR (ne (reg:LBTCC LBT_X86_CF_REGNUM) (const_int 0))
+ (const_int 1)
+ (const_int 0)))]
+ ""
+ "setx86j\t%0,2"
+ [(set_attr "mode" "<MODE>")])
+
+(define_expand "u<optab>c<mode>5"
+ [(set (match_operand:GPR 0 "register_operand")
+ (addsub:GPR (addsub:GPR (match_operand:GPR 2 "register_operand")
+ (match_operand:GPR 3 "reg_or_0_operand"))
+ (match_operand:GPR 4 "reg_or_0_operand")))
+ (match_operand:GPR 1 "register_operand")]
+ ""
+ {
+ if (operands[4] == const0_rtx)
+ {
+ emit_insn (gen_<optab><mode>3 (operands[0],
+ operands[2], operands[3]));
+ emit_insn (gen_lbt_x86_flag_for_<optab>_<mode> (operands[2],
+ operands[3]));
+ }
+ else
+ {
+ emit_insn (gen_lbt_x86_set_cf_from_<mode> (operands[4]));
+ emit_insn (gen_lbt_<optab>c_<mode> (operands[0],
+ operands[2], operands[3]));
+ emit_insn (gen_lbt_x86_<optab>c_<mode> (operands[2], operands[3]));
+ }
+
+ emit_insn (gen_lbt_x86_setj_cf_<mode> (operands[1]));
+ DONE;
+ })
+
+(define_insn_and_split "*lbt_x86_remove_copying_carry_to_gpr_<mode>"
+ [(set (reg:LBTCC LBT_X86_CF_REGNUM)
+ (ne:LBTCC (if_then_else:GPR
+ (ne (reg:LBTCC LBT_X86_CF_REGNUM)
+ (const_int 0))
+ (const_int 1)
+ (const_int 0))
+ (const_int 0)))]
+ "loongarch_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ "emit_note (NOTE_INSN_DELETED); DONE;")
diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
index 64caa8d6698..ba5f441cf5f 100644
--- a/gcc/config/loongarch/loongarch-modes.def
+++ b/gcc/config/loongarch/loongarch-modes.def
@@ -24,6 +24,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format);
/* For floating point conditions in FCC registers. */
CC_MODE (FCC);
+/* LBT flags. */
+CC_MODE (LBTCC);
+
/* Vector modes. */
VECTOR_MODES (INT, 4); /* V4QI V2HI */
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 6ec3ee62502..1fe26ec92c5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -205,7 +205,7 @@ const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
- FRAME_REGS, FRAME_REGS
+ FRAME_REGS, FRAME_REGS, LBT_FLAG_REGS, LBT_FLAG_REGS,
};
/* Information about a single argument. */
@@ -3854,6 +3854,27 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
case UNGT:
case UNLE:
case UNLT:
+ /* Special case for lbt_x86_redundant_cf_to_gpr in lbt.md.
+ TODO it'd be better to handle this in TARGET_INSN_COST once
+ PR113325 is resolved. */
+ if (mode == LBTCCmode && code == NE)
+ {
+ rtx inner = XEXP (x, 0);
+ if (GET_CODE (inner) == IF_THEN_ELSE
+ && XEXP (inner, 1) == const1_rtx
+ && XEXP (inner, 2) == const0_rtx)
+ {
+ inner = XEXP (inner, 0);
+ if (GET_CODE (inner) == NE
+ && GET_MODE (XEXP (inner, 0)) == mode
+ && XEXP (inner, 1) == const0_rtx)
+ {
+ *total = 0;
+ return true;
+ }
+ }
+ }
+
/* Branch comparisons have VOIDmode, so use the first operand's
mode instead. */
mode = GET_MODE (XEXP (x, 0));
@@ -6685,6 +6706,9 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
if (mode == FCCmode)
return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
+ if (mode == LBTCCmode)
+ return regno == LBT_X86_CF_REGNUM || regno == LBT_X86_OF_REGNUM;
+
size = GET_MODE_SIZE (mode);
mclass = GET_MODE_CLASS (mode);
@@ -10977,6 +11001,46 @@ loongarch_optab_supported_p (int op, machine_mode, machine_mode,
}
}
+/* Implement the TARGET_MIN_ARITHMETIC_PRECISION hook. */
+static unsigned int
+loongarch_min_arithmetic_precision (void)
+{
+ /* Note that this target hook is designed to provide info about the
+ condition codes from arithmetic. On LoongArch the condition codes
+ are generated by LBT, so we return 8 here because we have "x86add.q"
+ etc. despite we don't have "add.q". */
+ return 8;
+}
+
+static HARD_REG_SET
+loongarch_zero_call_used_regs (HARD_REG_SET regs)
+{
+ int lbt_mask = 0;
+ HARD_REG_SET zeroed;
+
+ CLEAR_HARD_REG_SET (zeroed);
+
+ static CONSTEXPR const std::pair<int, int> lbt[] = {
+ {LBT_X86_CF_REGNUM, 0x1},
+ {LBT_X86_OF_REGNUM, 0x20},
+ };
+
+ for (auto p: lbt)
+ {
+ int regno = p.first;
+ if (TEST_HARD_REG_BIT (regs, regno))
+ {
+ SET_HARD_REG_BIT (zeroed, regno);
+ CLEAR_HARD_REG_BIT (regs, regno);
+ lbt_mask |= p.second;
+ }
+ }
+
+ emit_insn (gen_lbt_clear_flags (GEN_INT (lbt_mask)));
+
+ return zeroed | default_zero_call_used_regs (regs);
+}
+
/* If -fverbose-asm, dump some info for debugging. */
static void
loongarch_asm_code_end (void)
@@ -11263,6 +11327,14 @@ loongarch_asm_code_end (void)
#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
loongarch_builtin_support_vector_misalignment
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION \
+ loongarch_min_arithmetic_precision
+
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS \
+ loongarch_zero_call_used_regs
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f7fe950f333..4985d5dd35e 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -305,9 +305,10 @@ along with GCC; see the file COPYING3. If not see
- 2 fake registers:
- ARG_POINTER_REGNUM
- FRAME_POINTER_REGNUM
+ - 2 LBT status registers (x86-style CF and OF; others not supported yet)
*/
-#define FIRST_PSEUDO_REGISTER 74
+#define FIRST_PSEUDO_REGISTER 76
/* zero, tp, sp and x are fixed. */
#define FIXED_REGISTERS \
@@ -318,7 +319,7 @@ along with GCC; see the file COPYING3. If not see
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Others. */ \
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1}
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0}
/* The call RTLs themselves clobber ra. */
#define CALL_USED_REGISTERS \
@@ -329,7 +330,7 @@ along with GCC; see the file COPYING3. If not see
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Others. */ \
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
/* Internal macros to classify a register number as to whether it's a
general purpose register, a floating point register, or a status
@@ -387,6 +388,9 @@ along with GCC; see the file COPYING3. If not see
#define ARG_POINTER_REGNUM 72
#define FRAME_POINTER_REGNUM 73
+#define LBT_X86_CF_REGNUM 74
+#define LBT_X86_OF_REGNUM 75
+
#define HARD_FRAME_POINTER_REGNUM (GP_REG_FIRST + 22)
#define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
@@ -449,6 +453,7 @@ enum reg_class
FP_REGS, /* floating point registers */
FCC_REGS, /* status registers (fp status) */
FRAME_REGS, /* arg pointer and frame pointer */
+ LBT_FLAG_REGS, /* LBT flag registers */
ALL_REGS, /* all registers */
LIM_REG_CLASSES /* max value + 1 */
};
@@ -471,6 +476,7 @@ enum reg_class
"FP_REGS", \
"FCC_REGS", \
"FRAME_REGS", \
+ "LBT_FLAG_REGS", \
"ALL_REGS" \
}
@@ -495,7 +501,8 @@ enum reg_class
{ 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \
{ 0x00000000, 0x00000000, 0x000000ff }, /* FCC_REGS */ \
{ 0x00000000, 0x00000000, 0x00000300 }, /* FRAME_REGS */ \
- { 0xffffffff, 0xffffffff, 0x000003ff } /* ALL_REGS */ \
+ { 0x00000000, 0x00000000, 0x00000c00 }, /* LBT_FLAG_REGS */ \
+ { 0xffffffff, 0xffffffff, 0x00000fff } /* ALL_REGS */ \
}
/* A C expression whose value is a register class containing hard
@@ -535,7 +542,7 @@ enum reg_class
56, 57, 58, 59, 60, 61, 62, 63, \
/* None of the remaining classes have defined call-saved \
registers. */ \
- 64, 65, 66, 67, 68, 69, 70, 71, 72, 73}
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75}
#define IMM_BITS 12
#define IMM_REACH (HOST_WIDE_INT_1 << IMM_BITS)
@@ -908,7 +915,7 @@ typedef struct {
"$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", \
"$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", \
"$fcc0","$fcc1","$fcc2","$fcc3","$fcc4","$fcc5","$fcc6","$fcc7", \
- "$arg", "$frame"}
+ "$arg", "$frame", "$cf", "$of"}
/* This macro defines additional names for hard registers. */
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 25c1d323ba0..be48f208d76 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4413,6 +4413,8 @@ (define_insn_and_rewrite "simple_store<mode>"
; The LoongArch SIMD Instructions.
(include "simd.md")
+(include "lbt.md")
+
(define_c_enum "unspec" [
UNSPEC_ADDRESS_FIRST
])
--
2.45.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment