Created
October 10, 2013 11:45
-
-
Save mntmn/6917091 to your computer and use it in GitHub Desktop.
thumb assembler in one page of c work in progress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <string.h> | |
#include <stdio.h> | |
#include "slre.h" | |
char* asm_input; | |
uint16_t* thumb_output; | |
#define S_LABEL_OR_INSTR 0 | |
#define S_IDENTIFIER 1 | |
#define S_INSTRUCTION 2 | |
#define S_DONE 10 | |
#define S_ERROR 11 | |
#define NUM_THUMB_INSTRS 82 | |
#define NUM_CAPS 4 | |
struct ThumbInstruction { | |
const char* instruction; | |
const char format; | |
const uint16_t opcode; | |
}; | |
char labels[8][8]; | |
char cur_identifier[64]; | |
const struct ThumbInstruction THUMB_INSTRS[NUM_THUMB_INSTRS] = { | |
// #offset3 | |
// #offset5 | |
// #offset8 | |
// #offset11 | |
// word8 | |
// register r0 | |
// 1 MOVE SHIFTED REG | |
{"lslr([0-7]),r([0-7]),#([0-9]+)$", 1, 0}, | |
{"lsrr([0-7]),r([0-7]),#([0-9]+)$", 1, 1 << 11}, | |
{"asrr([0-7]),r([0-7]),#([0-9]+)$", 1, 1 << 12}, | |
// 2 ADD / SUBTRACT | |
{"addr([0-7]),r([0-7]),r([0-7])$", 2, 1 << 12 | 1 << 11}, | |
{"addr([0-7]),r([0-7]),#([0-9]+)$", 2, 1 << 12 | 1 << 11 | 1 << 10}, // immediate | |
{"subr([0-7]),r([0-7]),r([0-7])$", 2, 1 << 12 | 1 << 11 | 1 << 9}, | |
{"subr([0-7]),r([0-7]),#([0-9]+)$", 2, 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9}, // immediate | |
// 3 IMMEDIATE MOV/CMP/ADD/SUB | |
{"movr([0-7]),#([0-9]+)", 3, 1 << 13}, // immediate move 00100000 00000000 | |
{"cmpr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 11}, // immediate compare 00101000 00000000 | |
{"addr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 12}, // immediate add 00110000 00000000 | |
{"subr([0-7]),#([0-9]+)", 3, 1 << 13 | 1 << 12 | 1 << 11}, // immediate sub 00111000 00000000 | |
// 4 ALU | |
{"andr([0-7]),r([0-7])$", 4, 1 << 14}, | |
{"eorr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 6}, | |
{"lslr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 7}, | |
{"lsrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 7 | 1 << 6}, | |
{"asrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8}, | |
{"adcr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 6}, | |
{"sbcr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 7}, | |
{"rorr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 8 | 1 << 7 | 1 <<6}, | |
{"tstr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9}, | |
{"negr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 6}, | |
{"cmpr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 7}, | |
{"cmnr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 7 | 1 << 6}, | |
{"orrr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8}, | |
{"mulr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 6}, | |
{"bicr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 7}, | |
{"mvnr([0-7]),r([0-7])$", 4, 1 << 14 | 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6}, | |
// 5 Hi register operations/branch exchange | |
{"addr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 6}, // rd, hs | |
{"addr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 7}, // hd, rs | |
{"addr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 7 | 1 << 6}, // hd, hs | |
{"cmpr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 6}, // rd, hs | |
{"cmpr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 7}, // hd, rs | |
{"cmpr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 8 | 1 << 7 | 1 << 6}, // hd, hs | |
{"movr([0-7]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 6}, // rd, hs | |
{"movr([8-9]|1[0-5]),r([0-7])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 7}, // hd, rs | |
{"movr([8-9]|1[0-5]),r([8-9]|1[0-5])", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 7 | 1 << 6}, // hd, hs | |
{"bxr([0-7])$", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 8}, // rs | |
{"bxr([8-9]|1[0-5])$", 5, 1 << 14 | 1 << 10 | 1 << 9 | 1 << 8 | 1 << 6}, // hs | |
// 6 PC-relative load | |
{"ldrr([0-7]),\\[PC,#([0-9]+)\\]", 6, 1 << 14 | 1 << 11}, // LDR Rd, [PC, #Imm] | |
// 7 load/store with register offset | |
{"strr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12}, // STR Rd, [Rb, Ro] | |
{"strbr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 10}, // STRB Rd, [Rb, Ro] | |
{"ldrr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 11}, // LDR Rd, [Rb, Ro] | |
{"ldrbr([0-7]),\\[r([0-7]),r([0-7])\\]", 7, 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10}, // LDRB Rd, [Rb, Ro] | |
// 8 load/store sign-extended byte/halfword | |
{"strhr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9}, // STRH Rd, [Rb, Ro] | |
{"ldrhr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 10}, // LDRH Rd, [Rb, Ro] | |
{"ldsbr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 11}, // LDSB Rd, [Rb, Ro] | |
{"ldshr([0-7]),\\[r([0-7]),r([0-7])\\]", 8, 1 << 14 | 1 << 12 | 1 << 9 | 1 << 11 | 1 << 10}, // LDSH Rd, [Rb, Ro] | |
// 9 load/store with immediate offset | |
{"strr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13}, // STR Rd, [Rb, #Imm] | |
{"ldrr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 11}, // LDR Rd, [Rb, #Imm] | |
{"strbr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 12}, // STRB Rd, [Rb, #Imm] | |
{"ldrbr([0-7]),\\[r([0-7]),#([0-7])\\]", 9, 1 << 14 | 1 << 13 | 1 << 12 | 1 << 11}, // LDRB Rd, [Rb, #Imm] // 51 | |
// 10 load/store with immediate offset | |
{"strhr([0-7]),\\[r([0-7]),#([0-9]+)\\]", 10, 1 << 15}, // STRH Rd, [Rb, #Imm] | |
{"ldrhr([0-7]),\\[r([0-7]),#([0-9]+)\\]", 10, 1 << 15 | 1 << 11}, // LDRH Rd, [Rb, #Imm] | |
// 11 SP-relative load/store | |
{"strr([0-7]),\\[SP,#([0-9]+)\\]", 11, 1 << 15 | 1 << 12}, // STR Rd, [SP, #Imm] | |
{"ldrr([0-7]),\\[SP,#([0-9]+)\\]", 11, 1 << 15 | 1 << 12 | 1 << 11}, // LDR Rd, [SP, #Imm] | |
// 12 load address | |
{"addr([0-7]),PC,#([0-9]+)", 12, 1 << 15 | 1 << 13}, // ADD Rd, PC, #Imm | |
{"addr([0-7]),SP,#([0-9]+)", 12, 1 << 15 | 1 << 13 | 1 << 11}, // ADD Rd, SP, #Imm | |
// 13 add offset to Stack Pointer | |
{"addSP,#([0-9]+)", 13, 1 << 15 | 1 << 13 | 1 << 12}, // ADD SP, #Imm | |
{"addSP,#\\-([0-9]+)", 13, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 7}, // ADD SP, #-Imm | |
// 14 push/pop registers | |
{"push {rlist}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10}, | |
{"push {rlist, LR}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 8}, | |
{"pop {rlist}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 11}, | |
{"pop {rlist, PC}", 14, 1 << 15 | 1 << 13 | 1 << 12 | 1 << 10 | 1 << 11 | 1 << 8}, | |
// 15 multiple load/store | |
{"stmia Rb! {rlist}", 15, 1 << 15 | 1 << 14}, | |
{"ldmia Rb! {rlist}", 15, 1 << 15 | 1 << 14 | 1 << 11}, // 65 | |
// 16 conditional branch | |
{"beq([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12}, | |
{"bne([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 8}, | |
{"bcs([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 9}, | |
{"bcc([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 9 | 1 << 8}, | |
{"bmi([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10}, | |
{"bpl([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 8}, | |
{"bvs([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 9}, | |
{"bvc([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 10 | 1 << 9 | 1 << 8}, | |
{"bhi([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11}, | |
{"bls([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 8}, | |
{"bge([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 9}, | |
{"blt([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 9 | 1 << 8}, | |
{"bgt([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10}, | |
{"ble([a-zA-Z]+)$", 16, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10 | 1 << 8}, | |
// 17 software interrupt | |
{"swi([0-9]+)$", 17, 1 << 15 | 1 << 14 | 1 << 12 | 1 << 11 | 1 << 10 | 1 << 9 | 1 << 8}, | |
// 18 unconditional branch | |
{"b([a-zA-Z]+)$", 18, 1 << 15 | 1 << 14 | 1 << 13}, | |
// 19 long branch with link (generates two instructions) | |
{"bl([a-zA-Z]+)$", 19, 1 << 15 | 1 << 14 | 1 << 13 | 1 << 12}, | |
}; | |
int is_alphanum(char c) { | |
return ((c>='0' && c<='9') || (c>='a' && c<='z') || (c>='A' && c<='Z')); | |
}; | |
int is_identifier(char c) { | |
return ((c>='0' && c<='9') || (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='#' || c==',' || c=='[' || c==']'); | |
}; | |
int assemble_thumb(char* input, uint16_t* output, int input_len) { | |
asm_input = input; | |
thumb_output = output; | |
int state = S_IDENTIFIER; | |
// phase 1: gather labels | |
//int input_len = strlen(input); | |
int idx = 0; | |
int out_idx = 0; | |
int cur_instr = 0; | |
int cur_label_idx = 0; | |
int cur_ident_idx = 0; | |
int identifiers = 0; | |
state = S_IDENTIFIER; | |
struct slre_cap caps[NUM_CAPS]; | |
while (state!=S_DONE && state!=S_ERROR) { | |
char c = *(input + idx++); | |
if (c=='\n' || c==0) { | |
if (state==S_IDENTIFIER) { | |
// identifier done, is not label | |
identifiers++; | |
if (cur_ident_idx>0) { | |
cur_identifier[cur_ident_idx]=0; | |
#ifdef ASM_DEBUG | |
printf("Ident: %s\n",cur_identifier); | |
#endif | |
int i; | |
for (i=0; i<NUM_THUMB_INSTRS; i++) { | |
if (slre_match(THUMB_INSTRS[i].instruction, cur_identifier, cur_ident_idx, caps, NUM_CAPS) > 0) { | |
// instruction found | |
#ifdef ASM_DEBUG | |
printf("Instruction: %s -> %s (%d)\n",cur_identifier,THUMB_INSTRS[i].instruction,THUMB_INSTRS[i].format); | |
#endif | |
uint16_t code = THUMB_INSTRS[i].opcode; | |
int or,br,sr,dr,imm; | |
char* target; | |
switch (THUMB_INSTRS[i].format) { | |
case 1: | |
imm = atoi(caps[2].ptr) & 0x1f; // 5 bit | |
sr = atoi(caps[1].ptr) & 7; // 3 bit | |
dr = atoi(caps[0].ptr) & 7; // 3 bit | |
code |= imm << 6; | |
code |= sr << 3; | |
code |= dr; | |
break; | |
case 2: | |
imm = atoi(caps[2].ptr) & 7; // 3 bit | |
sr = atoi(caps[1].ptr) & 7; // 3 bit | |
dr = atoi(caps[0].ptr) & 7; // 3 bit | |
code |= imm << 6; | |
code |= sr << 3; | |
code |= dr; | |
break; | |
case 3: | |
imm = atoi(caps[1].ptr) & 0xff; // 8 bit | |
dr = atoi(caps[0].ptr) & 7; // 3 bit | |
code |= dr << 8; | |
code |= imm; | |
break; | |
case 4: | |
// ALU | |
sr = atoi(caps[1].ptr) & 7; // 3 bit | |
dr = atoi(caps[0].ptr) & 7; | |
code |= sr << 3; | |
code |= dr; | |
break; | |
case 5: | |
sr = atoi(caps[0].ptr) & 7; // 3 bit | |
dr = atoi(caps[1].ptr) & 7; | |
code |= sr << 3; | |
code |= dr; | |
break; | |
case 6: | |
break; | |
case 7: | |
break; | |
case 8: | |
or = atoi(caps[0].ptr) & 7; // 3 bit | |
br = atoi(caps[1].ptr) & 7; // 3 bit | |
dr = atoi(caps[2].ptr) & 7; // 3 bit | |
code |= or << 6; | |
code |= br << 3; | |
code |= dr; | |
break; | |
case 9: | |
case 10: | |
dr = atoi(caps[0].ptr) & 7; | |
br = atoi(caps[1].ptr) & 7; // 3 bit | |
imm = atoi(caps[2].ptr) & 0x1f; // 5 bit | |
code |= imm << 6; | |
code |= br << 3; | |
code |= dr; | |
break; | |
case 16: | |
// branch | |
target = caps[0].ptr; | |
int j; | |
for (j=0; j<cur_label_idx; j++) { | |
if (strncmp(labels[j],target,caps[0].len) == 0) { | |
// target found | |
break; | |
} | |
} | |
} | |
output[out_idx++] = code; | |
//state = S_INSTRUCTION; | |
cur_instr = i; | |
} | |
} | |
} | |
cur_ident_idx = 0; | |
} | |
} | |
else if (is_identifier(c)) { | |
cur_identifier[cur_ident_idx++] = c; | |
if (cur_ident_idx>=64) cur_ident_idx=0; | |
} | |
else if (c==':') { | |
if (state==S_IDENTIFIER) { | |
// identifier done, is label | |
cur_identifier[cur_ident_idx]=0; | |
strncpy(labels[cur_label_idx], cur_identifier, cur_ident_idx+1); | |
//printf("Label: %s\n",labels[cur_label_idx]); | |
cur_label_idx++; | |
cur_ident_idx = 0; | |
} | |
} | |
if (idx>=input_len-1) state = S_DONE; | |
} | |
return identifiers; | |
} | |
#ifdef ASM_DEBUG | |
int main() { | |
// test | |
char* input = "\ | |
mov r0, #200\n\ | |
lsl r0,r0,#29\n\ | |
mov r1, #100\n\ | |
add r0, r0, r1\n\ | |
mov r1, #5\n\ | |
strb r0,[r2,#0]\n\ | |
bx r14\n\n\n\ | |
"; | |
uint16_t output[1024]; | |
int i; | |
for (i=0; i<1024; i++) output[i]=0; | |
assemble_thumb(input, output, strlen(input)); | |
uint16_t* o=output; | |
printf("%04x %04x %04x %04x %04x %04x %04x %04x\n",o[0],o[1],o[2],o[3],o[4],o[5],o[6],o[7]); | |
return 0; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment