Created
May 11, 2021 03:04
-
-
Save rprichard/2a601c3dd1b281f953b4e08b5a9361bb to your computer and use it in GitHub Desktop.
llvm-asm-thumb2-oddities.s
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// GAS: | |
// arm-linux-gnueabi-gcc -march=armv7-a -c test.s | |
// GNU assembler (GNU Binutils for Debian) 2.35.2 | |
// LLVM: | |
// /x/clang12/bin/clang -target armv7-linux-androideabi -c test.s | |
// clang version 12.0.0 (https://github.com/llvm/llvm-project/ b978a93635b584db380274d7c8963c73989944a1) | |
.syntax unified | |
.thumb | |
//////////////////////////////////////////////////////////////// | |
// All of these are wide (d1 f8 00 0f ldr.w r0, [r1, #3840]) | |
//////////////////////////////////////////////////////////////// | |
ldr.n r0, [r1, #0xf00] // GAS rejects, LLVM accepts as synonym for ldr.w | |
ldr.w r0, [r1, #0xf00] // GAS+LLVM accept | |
ldr r0, [r1, #0xf00] // GAS+LLVM accept | |
//////////////////////////////////////////////////////////////// | |
// narrow/wide suffixes on: | |
// - ldr r0, [r1, #-2] | |
//////////////////////////////////////////////////////////////// | |
// GAS and LLVM accept: | |
// - GNU objdump: f851 0c02 ldr.w r0, [r1, #-2] | |
// - LLVM objdump: 51 f8 02 0c ldr r0, [r1, #-2] | |
ldr r0, [r1, #-2] | |
// GAS accepts (same disasm as above), LLVM rejects: | |
// test.s:21:13: error: invalid operand for instruction | |
// ldr.w r0, [r1, #-2] | |
// ^ | |
ldr.w r0, [r1, #-2] | |
// GAS rejects: Error: invalid offset, value too big (0xFFFFFFFFFFFFFFFE) | |
// LLVM accepts (same disasm as above, i.e. a wide instruction) | |
ldr.n r0, [r1, #-2] | |
//////////////////////////////////////////////////////////////// | |
// narrow/wide suffixes on: | |
// - ldr r0, [r1, #2]! | |
// - ldr r0, [r1], #2 | |
// Same as above, but LLVM prints a different error | |
//////////////////////////////////////////////////////////////// | |
// GAS and LLVM accept: | |
// - GNU objdump: f851 0f02 ldr.w r0, [r1, #2]! | |
// - LLVM objdump: 51 f8 02 0f ldr r0, [r1, #2]! | |
ldr r0, [r1, #2]! | |
// GAS accepts (same disas), LLVM rejects: | |
// test.s:20:21: error: too many operands for instruction | |
// ldr.w r0, [r1, #2]! | |
// ^ | |
ldr.w r0, [r1, #2]! | |
// GAS rejects: Error: cannot honor width suffix -- `ldr.n r0,[r1,#2]!' | |
// LLVM: accepts (same disasm as above, i.e. a wide instruction) | |
ldr.n r0, [r1, #2]! | |
//////////////////////////////////////////////////////////////// | |
// Generalizing: LLVM seems to ignore an invalid ".n" suffix, | |
// whereas GAS treats it as a hard requirement. | |
//////////////////////////////////////////////////////////////// | |
// These are not valid as 16-bit instructions because they do not | |
// set condition codes. "adds.n r2, #2" is valid. | |
add.n r2, #2 // LLVM assembles as add.w, GAS rejects (Error: lo register required -- `add.n r2,#2') | |
add.n r2, r2, #2 // LLVM assembles as add.w, GAS rejects | |
add.n r2, r3, #2 // LLVM assembles as add.w, GAS rejects | |
mvn.n r0, #1 // LLVM assembles as mvn/mvn.w, GAS rejects (Error: cannot honor width suffix -- `mvn.n r0,#1') | |
//////////////////////////////////////////////////////////////// | |
// #-0 seems to be a way of selecting a variant of load/store | |
// encoding that handles small negative offsets. | |
// (see https://github.com/llvm/llvm-project/commit/f02d98d7c0d09f696a760c1c3cea8919a796348e) | |
// | |
// LLVM appears to define the special "-0" operand as an | |
// expression of value 0, whose first token is '-'. So | |
// #-(0) and #-(1-1) are treated specially, but #(-0) is not, | |
// because its first token is '('. | |
//////////////////////////////////////////////////////////////// | |
// In Thumb-2 mode, GAS does not assemble these to the #-0 | |
// variant. Both LLVM and GAS assemble all three of these as -0 | |
// in ARM mode, though. | |
// LLVM: 51 f8 00 0c ldr r0, [r1, #-0] | |
// GAS: d1 f8 00 00 ldr.w r0, [r1] | |
ldr r0, [r1, #-0] | |
// LLVM: 51 f8 00 0d ldr r0, [r1, #-0]! | |
// GAS: 51 f8 00 0f ldr r0, [r1, #0]! | |
ldr r0, [r1, #-0]! | |
// LLVM: 51 f8 00 09 ldr r0, [r1], #-0 | |
// GAS: 51 f8 00 0b ldr r0, [r1], #0 | |
ldr r0, [r1], #-0 | |
//////////////////////////////////////////////////////////////// | |
// Representing #-0 as INT32_MIN internally within LLVM also | |
// means that #-0 is assembled as INT32_MIN for these arithmetic | |
// instructions. GAS instead assembles the operand to 0. | |
//////////////////////////////////////////////////////////////// | |
// LLVM: | |
// | |
// 00000000 <$t.0>: | |
// 0: 4f f0 00 42 mov.w r2, #2147483648 | |
// 4: 4f f0 00 42 mov.w r2, #2147483648 | |
// 8: 62 f0 00 42 orn r2, r2, #2147483648 | |
// c: 62 f0 00 42 orn r2, r2, #2147483648 | |
// 10: 02 f1 00 42 add.w r2, r2, #2147483648 | |
// 14: 02 f1 00 42 add.w r2, r2, #2147483648 | |
// 18: 6f f0 00 40 mvn r0, #2147483648 | |
// 1c: 6f f0 00 40 mvn r0, #2147483648 | |
// | |
// GAS: | |
// | |
// 00000000 <$t>: | |
// 0: 4f f0 00 02 mov.w r2, #0 | |
// 4: 4f f0 00 42 mov.w r2, #2147483648 | |
// 8: 62 f0 00 02 orn r2, r2, #0 | |
// c: 62 f0 00 42 orn r2, r2, #2147483648 | |
// 10: 02 f1 00 02 add.w r2, r2, #0 | |
// 14: 02 f1 00 42 add.w r2, r2, #2147483648 | |
// 18: 6f f0 00 00 mvn r0, #0 | |
// 1c: 6f f0 00 40 mvn r0, #2147483648 | |
mov r2, #-0 | |
mov r2, #0x80000000 | |
orn r2, r2, #-0 | |
orn r2, r2, #0x80000000 | |
add r2, #-0 | |
add r2, #0x80000000 | |
mvn r0, #-0 | |
mvn r0, #0x80000000 | |
//////////////////////////////////////////////////////////////// | |
// LLVM quietly truncates out-of-range post-index offsets | |
//////////////////////////////////////////////////////////////// | |
// GAS: "Error: offset out of range" on lines 2,3,5,6 | |
// LLVM: | |
// 00000000 <$t.0>: | |
// 0: 51 f8 00 0b ldr r0, [r1], #0 | |
// 4: 51 f8 00 0b ldr r0, [r1], #0 | |
// 8: 51 f8 00 0b ldr r0, [r1], #0 | |
// c: 51 f8 01 0b ldr r0, [r1], #1 | |
// 10: 51 f8 01 0b ldr r0, [r1], #1 | |
// 14: 51 f8 01 0b ldr r0, [r1], #1 | |
ldr r0, [r1], #0 // 1 | |
ldr r0, [r1], #0x100 // 2 | |
ldr r0, [r1], #0x10000 // 3 | |
ldr r0, [r1], #1 // 4 | |
ldr r0, [r1], #0x101 // 5 | |
ldr r0, [r1], #0x10001 // 6 | |
// GAS: "Error: offset out of range" on lines 2,3,5 | |
// LLVM: | |
// 00000000 <$t.0>: | |
// 0: 51 f8 00 09 ldr r0, [r1], #-0 | |
// 4: 51 f8 00 09 ldr r0, [r1], #-0 | |
// 8: 51 f8 00 09 ldr r0, [r1], #-0 | |
// c: 51 f8 ff 09 ldr r0, [r1], #-255 | |
// 10: 51 f8 ff 09 ldr r0, [r1], #-255 | |
ldr r0, [r1], #-0 // 1 | |
ldr r0, [r1], #-256 // 2 | |
ldr r0, [r1], #0x80000000 // 3 | |
ldr r0, [r1], #-255 // 4 | |
ldr r0, [r1], #0x80000001 // 5 | |
// LLVM diagnoses the invalid offsets for ordinary offsets and | |
// for pre-index. | |
ldr r0, [r1, #0x1000] // LLVM rejects | |
ldr r0, [r1, #0x100]! // LLVM rejects | |
ldr r0, [r1, #-0x100] // LLVM rejects | |
ldr r0, [r1, #-0x100]! // LLVM rejects | |
ldr r0, [r1, #0xfff] // OK | |
ldr r0, [r1, #0xff]! // OK | |
ldr r0, [r1, #-0xff] // OK | |
ldr r0, [r1, #-0xff]! // OK | |
//////////////////////////////////////////////////////////////// | |
// Shifting (b/187306147) | |
//////////////////////////////////////////////////////////////// | |
// LLVM assembles this (psuedo-)instruction to one with very | |
// different: | |
// - LLVM: 4f ea 30 00 rrx r0, r0 | |
// - GAS: 4f ea 00 00 mov.w r0, r0 | |
// Maybe it's an off-by-one error in lib/Target/ARM/ARMInstrThumb2.td. | |
// Maybe apply this patch: | |
// -defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>; | |
// +defm t2ROR : T2I_sh_ir<0b11, "ror", imm1_31, rotr>; | |
ror r0, #0 | |
// LLVM allows these, GAS does not | |
lsl sp, r2, #0 | |
lsl r2, sp, #0 | |
// LSL rLO, #0 | |
lsls r0, #0 // LLVM assembles to: 00 00 movs r0, r0 | |
lsls r0, r0, #0 // LLVM assembles to: 00 00 movs r0, r0 | |
// LSL rHI, #0 (needs wide instruction -- GAS handles both) | |
lsls r8, #0 // LLVM rejects | |
lsls r8, r8, #0 // LLVM assembles to: 5f ea 08 08 movs.w r8, r8 | |
lsls r8, #2 // LLVM assembles to: 5f ea 88 08 lsls.w r8, r8, #2 | |
lsls r8, r8, #2 // LLVM assembles to: 5f ea 88 08 lsls.w r8, r8, #2 | |
// LLVM rejects these. GAS assembles them to: | |
// 00000000 <$t>: | |
// 0: 12 00 movs r2, r2 | |
// 2: 12 00 movs r2, r2 | |
// 4: 5f ea 02 02 movs.w r2, r2 | |
// 8: 5f ea 02 02 movs.w r2, r2 | |
// c: 4f ea 02 02 mov.w r2, r2 | |
// 10: 4f ea 02 02 mov.w r2, r2 | |
asrs r2, r2, #0 | |
lsrs r2, r2, #0 | |
asrs.w r2, r2, #0 | |
lsrs.w r2, r2, #0 | |
asr r2, r2, #0 | |
lsr r2, r2, #0 | |
// In ARM mode, LLVM and GAS assemble all of these to | |
// 03 30 a0 e1 mov r3, r3 | |
// The register can also be sp or pc. | |
.arm | |
lsl r3, #0 | |
lsl r3, r3, #0 | |
lsr r3, #0 | |
lsr r3, r3, #0 | |
asr r3, #0 | |
asr r3, r3, #0 | |
ror r3, #0 | |
ror r3, r3, #0 | |
.thumb |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment