Skip to content

Instantly share code, notes, and snippets.

@rprichard
Created May 11, 2021 03:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rprichard/2a601c3dd1b281f953b4e08b5a9361bb to your computer and use it in GitHub Desktop.
Save rprichard/2a601c3dd1b281f953b4e08b5a9361bb to your computer and use it in GitHub Desktop.
llvm-asm-thumb2-oddities.s
// GAS:
// arm-linux-gnueabi-gcc -march=armv7-a -c test.s
// GNU assembler (GNU Binutils for Debian) 2.35.2
// LLVM:
// /x/clang12/bin/clang -target armv7-linux-androideabi -c test.s
// clang version 12.0.0 (https://github.com/llvm/llvm-project/ b978a93635b584db380274d7c8963c73989944a1)
.syntax unified
.thumb
////////////////////////////////////////////////////////////////
// All of these are wide (d1 f8 00 0f ldr.w r0, [r1, #3840])
////////////////////////////////////////////////////////////////
ldr.n r0, [r1, #0xf00] // GAS rejects, LLVM accepts as synonym for ldr.w
ldr.w r0, [r1, #0xf00] // GAS+LLVM accept
ldr r0, [r1, #0xf00] // GAS+LLVM accept
////////////////////////////////////////////////////////////////
// narrow/wide suffixes on:
// - ldr r0, [r1, #-2]
////////////////////////////////////////////////////////////////
// GAS and LLVM accept:
// - GNU objdump: f851 0c02 ldr.w r0, [r1, #-2]
// - LLVM objdump: 51 f8 02 0c ldr r0, [r1, #-2]
ldr r0, [r1, #-2]
// GAS accepts (same disasm as above), LLVM rejects:
// test.s:21:13: error: invalid operand for instruction
// ldr.w r0, [r1, #-2]
// ^
ldr.w r0, [r1, #-2]
// GAS rejects: Error: invalid offset, value too big (0xFFFFFFFFFFFFFFFE)
// LLVM accepts (same disasm as above, i.e. a wide instruction)
ldr.n r0, [r1, #-2]
////////////////////////////////////////////////////////////////
// narrow/wide suffixes on:
// - ldr r0, [r1, #2]!
// - ldr r0, [r1], #2
// Same as above, but LLVM prints a different error
////////////////////////////////////////////////////////////////
// GAS and LLVM accept:
// - GNU objdump: f851 0f02 ldr.w r0, [r1, #2]!
// - LLVM objdump: 51 f8 02 0f ldr r0, [r1, #2]!
ldr r0, [r1, #2]!
// GAS accepts (same disas), LLVM rejects:
// test.s:20:21: error: too many operands for instruction
// ldr.w r0, [r1, #2]!
// ^
ldr.w r0, [r1, #2]!
// GAS rejects: Error: cannot honor width suffix -- `ldr.n r0,[r1,#2]!'
// LLVM: accepts (same disasm as above, i.e. a wide instruction)
ldr.n r0, [r1, #2]!
////////////////////////////////////////////////////////////////
// Generalizing: LLVM seems to ignore an invalid ".n" suffix,
// whereas GAS treats it as a hard requirement.
////////////////////////////////////////////////////////////////
// These are not valid as 16-bit instructions because they do not
// set condition codes. "adds.n r2, #2" is valid.
add.n r2, #2 // LLVM assembles as add.w, GAS rejects (Error: lo register required -- `add.n r2,#2')
add.n r2, r2, #2 // LLVM assembles as add.w, GAS rejects
add.n r2, r3, #2 // LLVM assembles as add.w, GAS rejects
mvn.n r0, #1 // LLVM assembles as mvn/mvn.w, GAS rejects (Error: cannot honor width suffix -- `mvn.n r0,#1')
////////////////////////////////////////////////////////////////
// #-0 seems to be a way of selecting a variant of load/store
// encoding that handles small negative offsets.
// (see https://github.com/llvm/llvm-project/commit/f02d98d7c0d09f696a760c1c3cea8919a796348e)
//
// LLVM appears to define the special "-0" operand as an
// expression of value 0, whose first token is '-'. So
// #-(0) and #-(1-1) are treated specially, but #(-0) is not,
// because its first token is '('.
////////////////////////////////////////////////////////////////
// In Thumb-2 mode, GAS does not assemble these to the #-0
// variant. Both LLVM and GAS assemble all three of these as -0
// in ARM mode, though.
// LLVM: 51 f8 00 0c ldr r0, [r1, #-0]
// GAS: d1 f8 00 00 ldr.w r0, [r1]
ldr r0, [r1, #-0]
// LLVM: 51 f8 00 0d ldr r0, [r1, #-0]!
// GAS: 51 f8 00 0f ldr r0, [r1, #0]!
ldr r0, [r1, #-0]!
// LLVM: 51 f8 00 09 ldr r0, [r1], #-0
// GAS: 51 f8 00 0b ldr r0, [r1], #0
ldr r0, [r1], #-0
////////////////////////////////////////////////////////////////
// Representing #-0 as INT32_MIN internally within LLVM also
// means that #-0 is assembled as INT32_MIN for these arithmetic
// instructions. GAS instead assembles the operand to 0.
////////////////////////////////////////////////////////////////
// LLVM:
//
// 00000000 <$t.0>:
// 0: 4f f0 00 42 mov.w r2, #2147483648
// 4: 4f f0 00 42 mov.w r2, #2147483648
// 8: 62 f0 00 42 orn r2, r2, #2147483648
// c: 62 f0 00 42 orn r2, r2, #2147483648
// 10: 02 f1 00 42 add.w r2, r2, #2147483648
// 14: 02 f1 00 42 add.w r2, r2, #2147483648
// 18: 6f f0 00 40 mvn r0, #2147483648
// 1c: 6f f0 00 40 mvn r0, #2147483648
//
// GAS:
//
// 00000000 <$t>:
// 0: 4f f0 00 02 mov.w r2, #0
// 4: 4f f0 00 42 mov.w r2, #2147483648
// 8: 62 f0 00 02 orn r2, r2, #0
// c: 62 f0 00 42 orn r2, r2, #2147483648
// 10: 02 f1 00 02 add.w r2, r2, #0
// 14: 02 f1 00 42 add.w r2, r2, #2147483648
// 18: 6f f0 00 00 mvn r0, #0
// 1c: 6f f0 00 40 mvn r0, #2147483648
mov r2, #-0
mov r2, #0x80000000
orn r2, r2, #-0
orn r2, r2, #0x80000000
add r2, #-0
add r2, #0x80000000
mvn r0, #-0
mvn r0, #0x80000000
////////////////////////////////////////////////////////////////
// LLVM quietly truncates out-of-range post-index offsets
////////////////////////////////////////////////////////////////
// GAS: "Error: offset out of range" on lines 2,3,5,6
// LLVM:
// 00000000 <$t.0>:
// 0: 51 f8 00 0b ldr r0, [r1], #0
// 4: 51 f8 00 0b ldr r0, [r1], #0
// 8: 51 f8 00 0b ldr r0, [r1], #0
// c: 51 f8 01 0b ldr r0, [r1], #1
// 10: 51 f8 01 0b ldr r0, [r1], #1
// 14: 51 f8 01 0b ldr r0, [r1], #1
ldr r0, [r1], #0 // 1
ldr r0, [r1], #0x100 // 2
ldr r0, [r1], #0x10000 // 3
ldr r0, [r1], #1 // 4
ldr r0, [r1], #0x101 // 5
ldr r0, [r1], #0x10001 // 6
// GAS: "Error: offset out of range" on lines 2,3,5
// LLVM:
// 00000000 <$t.0>:
// 0: 51 f8 00 09 ldr r0, [r1], #-0
// 4: 51 f8 00 09 ldr r0, [r1], #-0
// 8: 51 f8 00 09 ldr r0, [r1], #-0
// c: 51 f8 ff 09 ldr r0, [r1], #-255
// 10: 51 f8 ff 09 ldr r0, [r1], #-255
ldr r0, [r1], #-0 // 1
ldr r0, [r1], #-256 // 2
ldr r0, [r1], #0x80000000 // 3
ldr r0, [r1], #-255 // 4
ldr r0, [r1], #0x80000001 // 5
// LLVM diagnoses the invalid offsets for ordinary offsets and
// for pre-index.
ldr r0, [r1, #0x1000] // LLVM rejects
ldr r0, [r1, #0x100]! // LLVM rejects
ldr r0, [r1, #-0x100] // LLVM rejects
ldr r0, [r1, #-0x100]! // LLVM rejects
ldr r0, [r1, #0xfff] // OK
ldr r0, [r1, #0xff]! // OK
ldr r0, [r1, #-0xff] // OK
ldr r0, [r1, #-0xff]! // OK
////////////////////////////////////////////////////////////////
// Shifting (b/187306147)
////////////////////////////////////////////////////////////////
// LLVM assembles this (psuedo-)instruction to one with very
// different:
// - LLVM: 4f ea 30 00 rrx r0, r0
// - GAS: 4f ea 00 00 mov.w r0, r0
// Maybe it's an off-by-one error in lib/Target/ARM/ARMInstrThumb2.td.
// Maybe apply this patch:
// -defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>;
// +defm t2ROR : T2I_sh_ir<0b11, "ror", imm1_31, rotr>;
ror r0, #0
// LLVM allows these, GAS does not
lsl sp, r2, #0
lsl r2, sp, #0
// LSL rLO, #0
lsls r0, #0 // LLVM assembles to: 00 00 movs r0, r0
lsls r0, r0, #0 // LLVM assembles to: 00 00 movs r0, r0
// LSL rHI, #0 (needs wide instruction -- GAS handles both)
lsls r8, #0 // LLVM rejects
lsls r8, r8, #0 // LLVM assembles to: 5f ea 08 08 movs.w r8, r8
lsls r8, #2 // LLVM assembles to: 5f ea 88 08 lsls.w r8, r8, #2
lsls r8, r8, #2 // LLVM assembles to: 5f ea 88 08 lsls.w r8, r8, #2
// LLVM rejects these. GAS assembles them to:
// 00000000 <$t>:
// 0: 12 00 movs r2, r2
// 2: 12 00 movs r2, r2
// 4: 5f ea 02 02 movs.w r2, r2
// 8: 5f ea 02 02 movs.w r2, r2
// c: 4f ea 02 02 mov.w r2, r2
// 10: 4f ea 02 02 mov.w r2, r2
asrs r2, r2, #0
lsrs r2, r2, #0
asrs.w r2, r2, #0
lsrs.w r2, r2, #0
asr r2, r2, #0
lsr r2, r2, #0
// In ARM mode, LLVM and GAS assemble all of these to
// 03 30 a0 e1 mov r3, r3
// The register can also be sp or pc.
.arm
lsl r3, #0
lsl r3, r3, #0
lsr r3, #0
lsr r3, r3, #0
asr r3, #0
asr r3, r3, #0
ror r3, #0
ror r3, r3, #0
.thumb
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment