Skip to content

Instantly share code, notes, and snippets.

@mntmn
Created October 10, 2013 11:45
Show Gist options
  • Save mntmn/6917091 to your computer and use it in GitHub Desktop.
Save mntmn/6917091 to your computer and use it in GitHub Desktop.
thumb assembler in one page of c work in progress
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "slre.h"
char* asm_input;
uint16_t* thumb_output;
#define S_LABEL_OR_INSTR 0
#define S_IDENTIFIER 1
#define S_INSTRUCTION 2
#define S_DONE 10
#define S_ERROR 11
#define NUM_THUMB_INSTRS 82
#define NUM_CAPS 4
struct ThumbInstruction {
const char* instruction;
const char format;
const uint16_t opcode;
};
char labels[8][8];
char cur_identifier[64];
const struct ThumbInstruction THUMB_INSTRS[NUM_THUMB_INSTRS] = {
// #offset3
// #offset5
// #offset8
// #offset11
// word8
// register r0
// 1 MOVE SHIFTED REG
{"lslr([0-7]),r([0-7]),#([0-9]+)$", 1, 0},
{"lsrr([0-7]),r([0-7]),#([0-9]+)$", 1, 1 << 11},
{"asrr([0-7]),r([0-7]),#([0-9]+)$", 1, 1 << 12},
// 2 ADD / SUBTRACT
{"addr([0-7]),r([0-7]),r([0-7])$", 2, 1 << 12 | 1 << 11},
{"addr([0-7]),r([0-7]),#([0-9]+)$", 2, 1 << 12 | 1 << 11 | 1 << 10}, // immediate
{"subr([0-7]),r([0-7]),r([0-7])$", 2, 1 << 12 | 1 << 11 | 1 << 9},
{"subr([0-7]),r([0-7]),#([0-9]+)$", 2, 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9}, // immediate
// 3 IMMEDIATE MOV/CMP/ADD/SUB
{"movr([0-7]),#([0-9]+)", 3, 1 << 13}, // immediate move 00100000 00000000
{"cmpr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 11}, // immediate compare 00101000 00000000
{"addr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 12}, // immediate add 00110000 00000000
{"subr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 12 | 1 << 11}, // immediate sub 00111000 00000000
// 4 ALU
{"andr([0-7]),r([0-7])$", 4, 1 << 14},
{"eorr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 6},
{"lslr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 7},
{"lsrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 7 | 1 << 6},
{"asrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8},
{"adcr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 6},
{"sbcr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 7},
{"rorr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 7 | 1 <<6},
{"tstr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9},
{"negr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 6},
{"cmpr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 7},
{"cmnr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 7 | 1 << 6},
{"orrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8},
{"mulr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 6},
{"bicr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 7},
{"mvnr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6},
// 5 Hi register operations/branch exchange
{"addr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 6}, // rd, hs
{"addr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 7}, // hd, rs
{"addr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 7 | 1 << 6}, // hd, hs
{"cmpr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 6}, // rd, hs
{"cmpr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 7}, // hd, rs
{"cmpr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 7 | 1 << 6}, // hd, hs
{"movr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 6}, // rd, hs
{"movr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 7}, // hd, rs
{"movr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 7 | 1 << 6}, // hd, hs
{"bxr([0-7])$", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 8}, // rs
{"bxr([8-9]|1[0-5])$", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 8 | 1 << 6}, // hs
// 6 PC-relative load
{"ldrr([0-7]),\\[PC,#([0-9]+)\\]", 6, 1 << 14 | 1 << 11}, // LDR Rd, [PC, #Imm]
// 7 load/store with register offset
{"strr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12}, // STR Rd, [Rb, Ro]
{"strbr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 10}, // STRB Rd, [Rb, Ro]
{"ldrr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 11}, // LDR Rd, [Rb, Ro]
{"ldrbr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10}, // LDRB Rd, [Rb, Ro]
// 8 load/store sign-extended byte/halfword
{"strhr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9}, // STRH Rd, [Rb, Ro]
{"ldrhr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 10}, // LDRH Rd, [Rb, Ro]
{"ldsbr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 11}, // LDSB Rd, [Rb, Ro]
{"ldshr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 11 | 1 << 10}, // LDSH Rd, [Rb, Ro]
// 9 load/store with immediate offset
{"strr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13}, // STR Rd, [Rb, #Imm]
{"ldrr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 11}, // LDR Rd, [Rb, #Imm]
{"strbr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 12}, // STRB Rd, [Rb, #Imm]
{"ldrbr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 12 | 1 << 11}, // LDRB Rd, [Rb, #Imm] // 51
// 10 load/store with immediate offset
{"strhr([0-7]),\\[r([0-7]),#([0-9]+)\\]", 10, 1 << 15}, // STRH Rd, [Rb, #Imm]
{"ldrhr([0-7]),\\[r([0-7]),#([0-9]+)\\]", 10, 1 << 15 | 1 << 11}, // LDRH Rd, [Rb, #Imm]
// 11 SP-relative load/store
{"strr([0-7]),\\[SP,#([0-9]+)\\]", 11, 1 << 15 | 1 << 12}, // STR Rd, [SP, #Imm]
{"ldrr([0-7]),\\[SP,#([0-9]+)\\]", 11, 1 << 15 | 1 << 12 | 1 << 11}, // LDR Rd, [SP, #Imm]
// 12 load address
{"addr([0-7]),PC,#([0-9]+)", 12, 1 << 15 | 1 << 13}, // ADD Rd, PC, #Imm
{"addr([0-7]),SP,#([0-9]+)", 12, 1 << 15 | 1 << 13 | 1 << 11}, // ADD Rd, SP, #Imm
// 13 add offset to Stack Pointer
{"addSP,#([0-9]+)", 13, 1 << 15 | 1 << 13 | 1 << 12}, // ADD SP, #Imm
{"addSP,#\\-([0-9]+)", 13, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 7}, // ADD SP, #-Imm
// 14 push/pop registers
{"push {rlist}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10},
{"push {rlist, LR}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 8},
{"pop {rlist}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 11},
{"pop {rlist, PC}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 11 | 1 << 8},
// 15 multiple load/store
{"stmia Rb! {rlist}", 15, 1 << 15 | 1 << 14},
{"ldmia Rb! {rlist}", 15, 1 << 15 | 1 << 14 | 1 << 11}, // 65
// 16 conditional branch
{"beq([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12},
{"bne([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 8},
{"bcs([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 9},
{"bcc([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 9 | 1 << 8},
{"bmi([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10},
{"bpl([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 8},
{"bvs([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 9},
{"bvc([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 9 | 1 << 8},
{"bhi([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11},
{"bls([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 8},
{"bge([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 9},
{"blt([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 9 | 1 << 8},
{"bgt([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10},
{"ble([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10 | 1 << 8},
// 17 software interrupt
{"swi([0-9]+)$", 17, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9 | 1 << 8},
// 18 unconditional branch
{"b([a-zA-Z]+)$", 18, 1 << 15 | 1 << 14 | 1 << 13},
// 19 long branch with link (generates two instructions)
{"bl([a-zA-Z]+)$", 19, 1 << 15 | 1 << 14 | 1 << 13 | 1 << 12},
};
int is_alphanum(char c) {
return ((c>='0' && c<='9') || (c>='a' && c<='z') || (c>='A' && c<='Z'));
};
int is_identifier(char c) {
return ((c>='0' && c<='9') || (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='#' || c==',' || c=='[' || c==']');
};
int assemble_thumb(char* input, uint16_t* output, int input_len) {
asm_input = input;
thumb_output = output;
int state = S_IDENTIFIER;
// phase 1: gather labels
//int input_len = strlen(input);
int idx = 0;
int out_idx = 0;
int cur_instr = 0;
int cur_label_idx = 0;
int cur_ident_idx = 0;
int identifiers = 0;
state = S_IDENTIFIER;
struct slre_cap caps[NUM_CAPS];
while (state!=S_DONE && state!=S_ERROR) {
char c = *(input + idx++);
if (c=='\n' || c==0) {
if (state==S_IDENTIFIER) {
// identifier done, is not label
identifiers++;
if (cur_ident_idx>0) {
cur_identifier[cur_ident_idx]=0;
#ifdef ASM_DEBUG
printf("Ident: %s\n",cur_identifier);
#endif
int i;
for (i=0; i<NUM_THUMB_INSTRS; i++) {
if (slre_match(THUMB_INSTRS[i].instruction, cur_identifier, cur_ident_idx, caps, NUM_CAPS) > 0) {
// instruction found
#ifdef ASM_DEBUG
printf("Instruction: %s -> %s (%d)\n",cur_identifier,THUMB_INSTRS[i].instruction,THUMB_INSTRS[i].format);
#endif
uint16_t code = THUMB_INSTRS[i].opcode;
int or,br,sr,dr,imm;
char* target;
switch (THUMB_INSTRS[i].format) {
case 1:
imm = atoi(caps[2].ptr) & 0x1f; // 5 bit
sr = atoi(caps[1].ptr) & 7; // 3 bit
dr = atoi(caps[0].ptr) & 7; // 3 bit
code |= imm << 6;
code |= sr << 3;
code |= dr;
break;
case 2:
imm = atoi(caps[2].ptr) & 7; // 3 bit
sr = atoi(caps[1].ptr) & 7; // 3 bit
dr = atoi(caps[0].ptr) & 7; // 3 bit
code |= imm << 6;
code |= sr << 3;
code |= dr;
break;
case 3:
imm = atoi(caps[1].ptr) & 0xff; // 8 bit
dr = atoi(caps[0].ptr) & 7; // 3 bit
code |= dr << 8;
code |= imm;
break;
case 4:
// ALU
sr = atoi(caps[1].ptr) & 7; // 3 bit
dr = atoi(caps[0].ptr) & 7;
code |= sr << 3;
code |= dr;
break;
case 5:
sr = atoi(caps[0].ptr) & 7; // 3 bit
dr = atoi(caps[1].ptr) & 7;
code |= sr << 3;
code |= dr;
break;
case 6:
break;
case 7:
break;
case 8:
or = atoi(caps[0].ptr) & 7; // 3 bit
br = atoi(caps[1].ptr) & 7; // 3 bit
dr = atoi(caps[2].ptr) & 7; // 3 bit
code |= or << 6;
code |= br << 3;
code |= dr;
break;
case 9:
case 10:
dr = atoi(caps[0].ptr) & 7;
br = atoi(caps[1].ptr) & 7; // 3 bit
imm = atoi(caps[2].ptr) & 0x1f; // 5 bit
code |= imm << 6;
code |= br << 3;
code |= dr;
break;
case 16:
// branch
target = caps[0].ptr;
int j;
for (j=0; j<cur_label_idx; j++) {
if (strncmp(labels[j],target,caps[0].len) == 0) {
// target found
break;
}
}
}
output[out_idx++] = code;
//state = S_INSTRUCTION;
cur_instr = i;
}
}
}
cur_ident_idx = 0;
}
}
else if (is_identifier(c)) {
cur_identifier[cur_ident_idx++] = c;
if (cur_ident_idx>=64) cur_ident_idx=0;
}
else if (c==':') {
if (state==S_IDENTIFIER) {
// identifier done, is label
cur_identifier[cur_ident_idx]=0;
strncpy(labels[cur_label_idx], cur_identifier, cur_ident_idx+1);
//printf("Label: %s\n",labels[cur_label_idx]);
cur_label_idx++;
cur_ident_idx = 0;
}
}
if (idx>=input_len-1) state = S_DONE;
}
return identifiers;
}
#ifdef ASM_DEBUG
int main() {
// test
char* input = "\
mov r0, #200\n\
lsl r0,r0,#29\n\
mov r1, #100\n\
add r0, r0, r1\n\
mov r1, #5\n\
strb r0,[r2,#0]\n\
bx r14\n\n\n\
";
uint16_t output[1024];
int i;
for (i=0; i<1024; i++) output[i]=0;
assemble_thumb(input, output, strlen(input));
uint16_t* o=output;
printf("%04x %04x %04x %04x %04x %04x %04x %04x\n",o[0],o[1],o[2],o[3],o[4],o[5],o[6],o[7]);
return 0;
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment