Created
September 16, 2019 00:39
-
-
Save ISSOtm/2c0921a808b018a2414f317e047723a8 to your computer and use it in GitHub Desktop.
Tentative cleaner (lexer+parser) RGBDS linker script parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include "link/main.h" | |
#include "link/script.h" | |
#include "link/section.h" | |
#include "extern/err.h" | |
static inline bool isWhiteSpace(int c) | |
{ | |
return c == ' ' || c == '\t'; | |
} | |
static inline bool isNewline(int c) | |
{ | |
return c == '\r' || c == '\n'; | |
} | |
static bool tryParseNumber(char const *str, uint32_t *number) | |
{ | |
char const digits[] = { | |
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', | |
'A', 'B', 'C', 'D', 'E', 'F' | |
}; | |
uint8_t base = 10; | |
if (*str == '$') { | |
str++; | |
base = 16; | |
} | |
/* An empty string is not a number */ | |
if (!*str) | |
return false; | |
*number = 0; | |
do { | |
char chr = toupper(*str++); | |
uint8_t digit = 0; | |
while (digit < base) { | |
if (chr == digits[digit]) | |
break; | |
} | |
if (digit == base) | |
return false; | |
*number = *number * base + digit; | |
} while (*str); | |
return true; | |
} | |
enum LinkerScriptTokenType { | |
TOKEN_NEWLINE, | |
TOKEN_COMMAND, | |
TOKEN_BANK, | |
TOKEN_NUMBER, | |
TOKEN_SECTION, | |
TOKEN_EOF, | |
TOKEN_INVALID | |
}; | |
enum LinkerScriptCommand { | |
COMMAND_ORG, | |
COMMAND_ALIGN, | |
COMMAND_INVALID | |
}; | |
struct LinkerScriptToken { | |
enum LinkerScriptTokenType type; | |
union LinkerScriptTokenAttr { | |
enum LinkerScriptCommand command; | |
enum SectionType bank; | |
uint32_t number; | |
char *string; | |
} attr; | |
}; | |
static char const * const memtypes[] = { | |
[SECTTYPE_ROM0] = "ROM0", | |
[SECTTYPE_ROMX] = "ROMX", | |
[SECTTYPE_VRAM] = "VRAM", | |
[SECTTYPE_SRAM] = "SRAM", | |
[SECTTYPE_WRAM0] = "WRAM0", | |
[SECTTYPE_WRAMX] = "WRAMX", | |
[SECTTYPE_OAM] = "OAM", | |
[SECTTYPE_HRAM] = "HRAM" | |
}; | |
static char const * const commands[] = { | |
[COMMAND_ORG] = "ORG", | |
[COMMAND_ALIGN] = "ALIGN" | |
}; | |
static uint32_t lineNo; | |
static int readChar(FILE *file) | |
{ | |
int curchar = getc_unlocked(file); | |
if (curchar == EOF && ferror(file)) | |
err(1, "%s: Unexpected error reading linker script", __func__); | |
return curchar; | |
} | |
static struct LinkerScriptToken const *nextToken(void) | |
{ | |
static struct LinkerScriptToken token; | |
int curchar; | |
/* If the token has a string, make sure to avoid leaking it */ | |
if (token.type == TOKEN_SECTION) | |
free(token.attr.string); | |
/* Skip initial whitespace... */ | |
do | |
curchar = readChar(linkerScript); | |
while (isWhiteSpace(curchar)); | |
/* If this is a comment, skip to the end of the line */ | |
if (curchar == ';') { | |
do | |
curchar = readChar(linkerScript); | |
while (!isNewline(curchar) && curchar != EOF); | |
} | |
if (curchar == EOF) { | |
token.type = TOKEN_EOF; | |
} else if (isNewline(curchar)) { | |
/* If we have a newline char, this is a newline token */ | |
token.type = TOKEN_NEWLINE; | |
/* FIXME: This works with CRLF newlines, but not CR-only */ | |
if (curchar == '\r') | |
readChar(linkerScript); /* Read and discard LF */ | |
} else if (curchar == '"') { | |
/* If we have a string start, this is a section name */ | |
token.type = TOKEN_SECTION; | |
token.attr.string = NULL; /* Force initial alloc */ | |
size_t size = 0; | |
size_t capacity = 16; /* Half of the default capacity */ | |
do { | |
curchar = readChar(linkerScript); | |
if (curchar == EOF || isNewline(curchar)) | |
errx(1, "Line %u: Unterminated string", lineNo); | |
else if (curchar == '"') | |
/* Quotes force a string termination */ | |
curchar = '\0'; | |
if (size >= capacity || token.attr.string == NULL) { | |
capacity *= 2; | |
token.attr.string = realloc(token.attr.string, | |
capacity); | |
if (!token.attr.string) | |
err(1, "%s: Failed to allocate memory for section name", | |
__func__); | |
} | |
token.attr.string[size++] = curchar; | |
} while (curchar); | |
} else { | |
/* This is either a number, command or bank, that is: a word */ | |
char *str = NULL; | |
size_t size = 0; | |
size_t capacity = 8; /* Half of the default capacity */ | |
do { | |
curchar = readChar(linkerScript); | |
/* Whitespace or a comment start end the token */ | |
if (isWhiteSpace(curchar) || curchar == ';') | |
curchar = '\0'; | |
if (size >= capacity || str == NULL) { | |
capacity *= 2; | |
str = realloc(str, capacity); | |
if (!str) | |
err(1, "%s: Failed to allocate memory for token", | |
__func__); | |
} | |
} while (curchar); | |
token.type = TOKEN_INVALID; | |
for (enum LinkerScriptCommand i = 0; i < COMMAND_INVALID; i++) { | |
if (!strcmp(commands[i], str)) { | |
token.type = TOKEN_COMMAND; | |
token.attr.command = i; | |
break; | |
} | |
} | |
if (token.type == TOKEN_INVALID) { | |
for (enum SectionType type = 0; type < SECTTYPE_INVALID; | |
type++) { | |
if (!strcmp(memtypes[type], str)) { | |
token.type = TOKEN_BANK; | |
token.attr.bank = type; | |
break; | |
} | |
} | |
} | |
/* None of the string matched, so perhaps we have a number? */ | |
if (tryParseNumber(str, &token.attr.number)) | |
token.type = TOKEN_NUMBER; | |
else | |
errx(1, "Unknown token \"%s\" on linker script line %u", | |
lineNo); | |
free(str); | |
} | |
return &token; | |
} | |
static void processCommand(enum LinkerScriptCommand command, uint16_t arg, | |
uint16_t *pc) | |
{ | |
uint16_t alignedPC; | |
switch (command) { | |
case COMMAND_INVALID: | |
/* Not reached */ | |
case COMMAND_ORG: | |
if (*pc < arg) | |
errx(1, "Line %u: `ORG` cannot be used to go backwards", | |
lineNo); | |
*pc = arg; | |
break; | |
case COMMAND_ALIGN: | |
if (arg > 16) | |
arg = 16; | |
alignedPC = *pc & sect_AlignmentMask(arg); | |
/* Are we already aligned? */ | |
if (alignedPC != *pc) | |
*pc = alignedPC + (1 << arg); | |
} | |
} | |
enum LinkerScriptParserState { | |
PARSER_FIRSTTIME, | |
PARSER_LINESTART, | |
PARSER_LINEEND | |
}; | |
/* Part of internal state, but has data that needs to be freed */ | |
static uint16_t *curaddr[SECTTYPE_INVALID]; | |
/* Put as global to ensure it's initialized only once */ | |
static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME; | |
struct SectionPlacement *script_NextSection(void) | |
{ | |
static struct SectionPlacement section; | |
static enum SectionType type; | |
static uint32_t bank; | |
if (parserState == PARSER_FIRSTTIME) { | |
lineNo = 1; | |
/* Init PC for all banks */ | |
for (enum SectionType i = 0; i < SECTTYPE_INVALID; i++) { | |
curaddr[i] = malloc(sizeof(*curaddr[i]) * nbbanks(i)); | |
for (uint32_t bank = 0; bank < nbbanks(i); bank++) | |
curaddr[i][bank] = startaddr[i]; | |
} | |
type = SECTTYPE_INVALID; | |
parserState = PARSER_LINESTART; | |
} | |
for (;;) { | |
struct LinkerScriptToken const *token = nextToken(); | |
if (type != SECTTYPE_INVALID) { | |
if (curaddr[type][bank] > endaddr(type)) | |
errx(1, "Line %u: PC overflowed (%u > %u)", | |
curaddr[type][bank], endaddr(type)); | |
if (curaddr[type][bank] < startaddr[type]) | |
errx(1, "Line %u: PC underflowed (%u < %u)", | |
curaddr[type][bank], startaddr[type]); | |
} | |
switch (parserState) { | |
case PARSER_FIRSTTIME: | |
/* Not reached */ | |
case PARSER_LINESTART: | |
switch (token->type) { | |
case TOKEN_INVALID: | |
/* Not reached */ | |
case TOKEN_EOF: | |
return NULL; | |
case TOKEN_NUMBER: | |
errx(1, "Line %u: stray number", lineNo); | |
case TOKEN_NEWLINE: | |
lineNo++; | |
break; | |
case TOKEN_SECTION: | |
parserState = PARSER_LINEEND; | |
if (type == SECTTYPE_INVALID) | |
errx(1, "Line %u: Didn't specify a location before the section", | |
lineNo); | |
section.section = | |
sect_GetSection(token->attr.string); | |
section.address = curaddr[type][bank]; | |
section.bank = bank; | |
curaddr[type][bank] += section.section->size; | |
return §ion; | |
enum LinkerScriptTokenType tokType; | |
union LinkerScriptTokenAttr attr; | |
bool hasArg; | |
uint32_t arg; | |
case TOKEN_COMMAND: | |
case TOKEN_BANK: | |
tokType = token->type; | |
attr = token->attr; | |
token = nextToken(); | |
hasArg = token->type == TOKEN_NUMBER; | |
/* | |
* Leaving `arg` uninitialized when `!hasArg` | |
* causes GCC to warn about its use as an | |
* argument to `processCommand`. This cannot | |
* happen because `hasArg` has to be true, but | |
* silence the warning anyways. | |
* I dislike doing this because it could swallow | |
* actual errors, but I don't have a choice. | |
*/ | |
arg = hasArg ? token->attr.number : 0; | |
if (tokType == TOKEN_COMMAND) { | |
if (type == SECTTYPE_INVALID) | |
errx(1, "Line %u: Didn't specify a location before the command", | |
lineNo); | |
if (!hasArg) | |
errx(1, "Line %u: Command specified without an argument", | |
lineNo); | |
processCommand(attr.command, arg, | |
&curaddr[type][bank]); | |
} else { /* TOKEN_BANK */ | |
type = attr.bank; | |
/* | |
* If there's only one bank, | |
* specifying the number is optional. | |
*/ | |
if (!hasArg && nbbanks(type) != 1) | |
errx(1, "Line %u: Didn't specify a bank number", | |
lineNo); | |
else if (!hasArg) | |
arg = bankranges[type][0]; | |
else if (arg < bankranges[type][0]) | |
errx(1, "Line %u: specified bank number is too low (%u < %u)", | |
lineNo, arg, | |
bankranges[type][0]); | |
else if (arg > bankranges[type][1]) | |
errx(1, "Line %u: specified bank number is too high (%u > %u)", | |
lineNo, arg, | |
bankranges[type][1]); | |
bank = arg; | |
} | |
/* If we read a token we shouldn't have... */ | |
if (token->type != TOKEN_NUMBER) | |
goto lineend; | |
break; | |
} | |
break; | |
case PARSER_LINEEND: | |
lineend: | |
if (token->type == TOKEN_EOF) | |
return NULL; | |
else if (token->type != TOKEN_NEWLINE) | |
errx(1, "Line %u: Unexpected token at the end", | |
lineNo); | |
lineNo++; | |
parserState = PARSER_LINESTART; | |
break; | |
} | |
} | |
} | |
void script_Cleanup(void) | |
{ | |
for (enum SectionType type = 0; type < SECTTYPE_INVALID; type++) | |
free(curaddr[type]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment