Skip to content

Instantly share code, notes, and snippets.

@luismarques
Last active October 6, 2020 08:16
Show Gist options
  • Save luismarques/b3cad6e0178a6b1058eca98876b49ef2 to your computer and use it in GitHub Desktop.
Save luismarques/b3cad6e0178a6b1058eca98876b49ef2 to your computer and use it in GitHub Desktop.
RISC-V code quality issues (GCC vs Clang)

Examples of poor code generation

Constant materialization

C

long a() { return 0x94BFE000; }

GCC RV64GC

        li      a0,0x4A5FF000  # lui only
        slli    a0,a0,1
        ret

Clang RV64GC

        lui     a0, 74
        addiw   a0, a0, 1535
        slli    a0, a0, 13
        ret

Sign extensions and word-wise operations

C

long a(long b) {
  int c = ++b;
  return c;
}

GCC

	addiw	a0,a0,1
	ret

Clang

	slli	a0, a0, 32
	addi	a1, zero, 1
	slli	a1, a1, 32
	add	a0, a0, a1
	srai	a0, a0, 32
	ret

C

int a(int b) {
  int c = b * b, d = c % b, e = !d;
  return e;
}

GCC

        mulw    a5,a0,a0
        remw    a0,a5,a0
        seqz    a0,a0
        ret

Clang

        mul     a1, a0, a0
        remw    a0, a1, a0
        slli    a0, a0, 32
        srli    a0, a0, 32
        seqz    a0, a0
        ret

C

int a(int b) {
  int c = 6472 >> b;
  return c;
}

GCC

	li	a5,8192
	addiw	a5,a5,-1720
	sraw	a0,a5,a0
	ret

Clang

	lui	a1, 2
	addiw	a1, a1, -1720
	srlw	a0, a1, a0
	slli	a0, a0, 32
	srli	a0, a0, 32
	ret

C

int a(int b) {
  int c = 8 / b;
  return c;
}

GCC

	li	a5,8
	divw	a0,a5,a0
	ret

Clang

	addi	a1, zero, 8
	div	a0, a1, a0
	sext.w	a0, a0
	ret

C

int a(int b) {
  int c = -57, d = c << b;
  return d;
}

GCC

	li	a5,-57
	sllw	a0,a5,a0
	ret

Clang

	addi	a1, zero, 1
	slli	a1, a1, 32
	addi	a1, a1, -57
	sllw	a0, a1, a0
	ret

Maybe it's just a sign extension issue, maybe it's more:

C

int a(int b) {
  int c = ~b;
  --c;
  return c;
}

GCC

        not     a0,a0
        addiw   a0,a0,-1
        ret

Clang

        addi    a1, zero, 1
        slli    a1, a1, 32
        addi    a1, a1, -2
        subw    a0, a1, a0
        ret

C

int a(int b) {
  short c = -b;
  char d = -c;
  return d;
}

GCC

        andi    a0,a0,0xff
        ret

Clang

        slli    a0, a0, 16
        neg     a0, a0
        srli    a0, a0, 16
        neg     a0, a0
        andi    a0, a0, 255
        ret

C

long a(long b) {
  int c = -b;
  return c;
}

GCC RV64GC

        negw    a0,a0
        ret

Clang

        slli    a0, a0, 32
        neg     a0, a0
        srai    a0, a0, 32
        ret

Comparisons

C

int a(int b) {
  int c = 0 >= b, d = -c;
  return d;
}

GCC

        slti    a0,a0,1
        subw    a0,zero,a0
        ret

Clang

        add     a1, zero, a0
        addi    a2, zero, 1
        addi    a0, zero, -1
        blt     a1, a2, .LBB0_2
        mv      a0, zero
.LBB0_2:
        ret

C

int a(short b) {
  double c = 7 >= b;
  ++c;
  return c;
}

GCC RV64GC

        slti    a0,a0,8
        addi    a0,a0,1
        ret

Clang RV64GC

        add     a1, zero, a0
        addi    a2, zero, 8
        addi    a0, zero, 2
        blt     a1, a2, .LBB0_2
# %bb.1:
        addi    a0, zero, 1
.LBB0_2:
        ret

Unnecessary seqz.

C

float a(double b, int c) {
  float d = c ?: b;
  return d;
}

GCC RV64GC

	beq	a0,zero,.L2
	fcvt.d.w	fa0,a0
.L2:
	fcvt.s.d	fa0,fa0
	ret

Clang RV64GC

	seqz	a1, a0
	bnez	a1, .LBB0_2
# %bb.1:
	fcvt.d.l	fa0, a0
.LBB0_2:
	fcvt.s.d	fa0, fa0
	ret

C

int a(short b) {
  short c = 0 <= b;
  return c;
}

GCC RV32GC

        not     a0,a0
        srli    a0,a0,31
        ret

Clang RV32GC

        not     a0, a0
        lui     a1, 8
        and     a0, a0, a1
        srli    a0, a0, 15
        ret

Math optimisations

C

int a(int b) {
  int c = ~b--, d = c - b;
  return d;
}

GCC

	slli	a0,a0,1
	neg	a0,a0
	ret

Clang

	not	a1, a0
	sub	a0, a1, a0
	addi	a0, a0, 1
	ret

C

int a(int b) {
  int c = b + b, d = b == c;
  return d;
}

GCC

	seqz	a0,a0
	ret

Clang

	slli	a1, a0, 1
	xor	a0, a0, a1
	seqz	a0, a0
	ret

C

int a(short b) {
  char c = b, e = -b;
  short d = !c;
  int f = e ? d : e;
  char g = f ?: 7;
  return g;
}

GCC RV32GC

        li      a0,7
        ret

Clang RV32GC

        neg     a1, a0
        andi    a1, a1, 255
        bnez    a1, .LBB0_3
# %bb.1:
        addi    a0, zero, 7
        bnez    a1, .LBB0_4
.LBB0_2:
        ret
.LBB0_3:
        andi    a0, a0, 255
        seqz    a1, a0
        addi    a0, zero, 7
        beqz    a1, .LBB0_2
.LBB0_4:
        add     a0, zero, a1
        ret

C

char a(char b, short c) {
  int d = ++b;
  char e = b ?: c;
  short f = e ? c : d;
  return f;
}

GCC RV64GC

        andi    a0,a1,0xff
        ret

Clang RV32GC

        addi    a3, a0, 1
        andi    a0, a3, 255
        add     a2, zero, a1
        beq     a0, a3, .LBB0_3
# %bb.1:
        andi    a2, a2, 255
        bnez    a2, .LBB0_4
.LBB0_2:
        andi    a0, a0, 255
        ret
.LBB0_3:
        add     a2, zero, a0
        andi    a2, a2, 255
        beqz    a2, .LBB0_2
.LBB0_4:
        add     a0, zero, a1
        andi    a0, a0, 255
        ret

Inefficient float loads

C

int a;
float b() {
  double c = a == a;
  return c;
}

GCC

	lui	a5,%hi(.LC0)
	flw	fa0,%lo(.LC0)(a5)
	ret

Clang

	lui	a0, %hi(.LCPI0_0)
	addi	a0, a0, %lo(.LCPI0_0)
	flw	fa0, 0(a0)
	ret

FP comparison branchiness

C

double a(float b) {
  long c = !b;
  return c;
}

GCC

	fmv.s.x	fa5,zero
	feq.s	a5,fa0,fa5
	fcvt.d.w	fa0,a5
	ret

Clang

	fmv.w.x	ft0, zero
	feq.s	a0, fa0, ft0
	bnez	a0, .LBB0_2
# %bb.1:
	fcvt.d.w	fa0, zero
	ret
.LBB0_2:
	lui	a0, %hi(.LCPI0_0)
	addi	a0, a0, %lo(.LCPI0_0)
	fld	fa0, 0(a0)
	ret

C

float a(int b, int c) {
  float d = b >= c;
  return d;
}

GCC

        bge     a0,a1,.L3
        fmv.s.x fa0,zero
        ret
.L3:
        lui     a5,%hi(.LC0)
        flw     fa0,%lo(.LC0)(a5)
        ret

Clang

        slt     a0, a0, a1
        xori    a0, a0, 1
        bnez    a0, .LBB0_2
# %bb.1:
        fmv.w.x fa0, zero
        ret
.LBB0_2:
        lui     a0, %hi(.LCPI0_0)
        addi    a0, a0, %lo(.LCPI0_0)
        flw     fa0, 0(a0)
        ret

C

float a(float b) {
  long c = b, d = !c;
  return d;
}

GCC

        fcvt.l.s a5,fa0,rtz
        seqz    a5,a5
        fcvt.s.l        fa0,a5
        ret

Clang

        fcvt.l.s        a0, fa0, rtz
        seqz    a0, a0
        bnez    a0, .LBB0_2
# %bb.1:
        fmv.w.x fa0, zero
        ret
.LBB0_2:
        lui     a0, %hi(.LCPI0_0)
        addi    a0, a0, %lo(.LCPI0_0)
        flw     fa0, 0(a0)
        ret

Bad roundtripping:

C

double a(float b) {
  char c = b == b;
  return c;
}

GCC

        feq.s   a5,fa0,fa0
        fcvt.d.wu       fa0,a5
        ret

Clang

        feq.s   a0, fa0, fa0
        and     a0, a0, a0
        bnez    a0, .LBB0_2
# %bb.1:
        fcvt.d.w        fa0, zero
        ret
.LBB0_2:
        lui     a0, %hi(.LCPI0_0)
        addi    a0, a0, %lo(.LCPI0_0)
        fld     fa0, 0(a0)
        ret

FP constant materialisation

Even though GCC also doesn't do that, we could materialise small FP constants with addi xn, zero, imm12 plus one int to FP conversion instruction. Even if for some uarchs that were slower, we could use that for -Os.

C

float test(float a) {
    return a + 1.0;
}

Clang

        lui     a1, %hi(.LCPI0_0)
        addi    a1, a1, %lo(.LCPI0_0)
        flw     ft0, 0(a1)
        fmv.w.x ft1, a0
        fadd.s  ft0, ft1, ft0
        fmv.x.w a0, ft0
        ret

Unnecessary and after feq.d

C

long a(double b) {
  long c = b <= b;
  return c;
}

GCC

	feq.d	a0,fa0,fa0
	ret

Clang

	feq.d	a0, fa0, fa0
	and	a0, a0, a0
	ret

Unnecessary FP conversions

C

float a(long b) {
  double c = ~b;
  return c;
}

GCC

        not     a0,a0
        fcvt.s.w        fa0,a0
        ret

Clang

        not     a0, a0
        fcvt.d.w        ft0, a0
        fcvt.s.d        fa0, ft0
        ret

C

int a(char b, int c, short d) {
  double e = d;
  int f = e ? c : e;
  return f;
}

GCC

        li      a0,0
        beq     a2,zero,.L2
        mv      a0,a1
.L2:
        ret

Clang

        seqz    a0, a2
        bnez    a0, .LBB0_2
# %bb.1:
        fcvt.d.l        ft0, a1
        fcvt.l.d        a0, ft0, rtz
        ret
.LBB0_2:
        fcvt.d.l        ft0, a2
        fcvt.l.d        a0, ft0, rtz
        ret

C

float a(char b) {
  double c = b;
  return c;
}

GCC

        fcvt.s.wu       fa0,a0
        ret

Clang

        fcvt.d.wu       ft0, a0
        fcvt.s.d        fa0, ft0
        ret

Dead branch instruction

C

char b(char c, short d) {
  short e = c ? d : a;
  int f = c ? e : 2;
  return f;
}

GCC

        bnez    a0,.L2
        li      a1,2
.L2:
        andi    a0,a1,0xff
        ret

Clang

        beqz    a0, .LBB0_3
# %bb.1:
        beqz    a0, .LBB0_4
.LBB0_2:
        andi    a0, a1, 255
        ret
.LBB0_3:
        lui     a1, %hi(a)
        lw      a1, %lo(a)(a1)
        bnez    a0, .LBB0_2
.LBB0_4:
        addi    a1, zero, 2
        andi    a0, a1, 255
        ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment