ISSOtm/script.c

## script.c

#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <ctype.h>

#include "link/main.h"
#include "link/script.h"
#include "link/section.h"

#include "extern/err.h"

static inline bool isWhiteSpace(int c)
{
	return c == ' ' || c == '\t';
}

static inline bool isNewline(int c)
{
	return c == '\r' || c == '\n';
}

static bool tryParseNumber(char const *str, uint32_t *number)
{
	char const digits[] = {
		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
		'A', 'B', 'C', 'D', 'E', 'F'
	};
	uint8_t base = 10;

	if (*str == '$') {
		str++;
		base = 16;
	}

	/* An empty string is not a number */
	if (!*str)
		return false;

	*number = 0;
	do {
		char chr = toupper(*str++);
		uint8_t digit = 0;

		while (digit < base) {
			if (chr == digits[digit])
				break;
		}
		if (digit == base)
			return false;
		*number = *number * base + digit;
	} while (*str);

	return true;
}

enum LinkerScriptTokenType {
	TOKEN_NEWLINE,
	TOKEN_COMMAND,
	TOKEN_BANK,
	TOKEN_NUMBER,
	TOKEN_SECTION,
	TOKEN_EOF,

	TOKEN_INVALID
};

enum LinkerScriptCommand {
	COMMAND_ORG,
	COMMAND_ALIGN,

	COMMAND_INVALID
};

struct LinkerScriptToken {
	enum LinkerScriptTokenType type;
	union LinkerScriptTokenAttr {
		enum LinkerScriptCommand command;
		enum SectionType bank;
		uint32_t number;
		char *string;
	} attr;
};

static char const * const memtypes[] = {
	[SECTTYPE_ROM0]  = "ROM0",
	[SECTTYPE_ROMX]  = "ROMX",
	[SECTTYPE_VRAM]  = "VRAM",
	[SECTTYPE_SRAM]  = "SRAM",
	[SECTTYPE_WRAM0] = "WRAM0",
	[SECTTYPE_WRAMX] = "WRAMX",
	[SECTTYPE_OAM]   = "OAM",
	[SECTTYPE_HRAM]  = "HRAM"
};

static char const * const commands[] = {
	[COMMAND_ORG] = "ORG",
	[COMMAND_ALIGN] = "ALIGN"
};

static uint32_t lineNo;

static int readChar(FILE *file)
{
	int curchar = getc_unlocked(file);

	if (curchar == EOF && ferror(file))
		err(1, "%s: Unexpected error reading linker script", __func__);
	return curchar;
}

static struct LinkerScriptToken const *nextToken(void)
{
	static struct LinkerScriptToken token;
	int curchar;

	/* If the token has a string, make sure to avoid leaking it */
	if (token.type == TOKEN_SECTION)
		free(token.attr.string);

	/* Skip initial whitespace... */
	do
		curchar = readChar(linkerScript);
	while (isWhiteSpace(curchar));

	/* If this is a comment, skip to the end of the line */
	if (curchar == ';') {
		do
			curchar = readChar(linkerScript);
		while (!isNewline(curchar) && curchar != EOF);
	}

	if (curchar == EOF) {
		token.type = TOKEN_EOF;
	} else if (isNewline(curchar)) {
		/* If we have a newline char, this is a newline token */
		token.type = TOKEN_NEWLINE;

		/* FIXME: This works with CRLF newlines, but not CR-only */
		if (curchar == '\r')
			readChar(linkerScript); /* Read and discard LF */
	} else if (curchar == '"') {
		/* If we have a string start, this is a section name */
		token.type = TOKEN_SECTION;
		token.attr.string = NULL; /* Force initial alloc */

		size_t size = 0;
		size_t capacity = 16; /* Half of the default capacity */

		do {
			curchar = readChar(linkerScript);
			if (curchar == EOF || isNewline(curchar))
				errx(1, "Line %u: Unterminated string", lineNo);
			else if (curchar == '"')
				/* Quotes force a string termination */
				curchar = '\0';

			if (size >= capacity || token.attr.string == NULL) {
				capacity *= 2;
				token.attr.string = realloc(token.attr.string,
							    capacity);
				if (!token.attr.string)
					err(1, "%s: Failed to allocate memory for section name",
					    __func__);
			}
			token.attr.string[size++] = curchar;
		} while (curchar);
	} else {
		/* This is either a number, command or bank, that is: a word */
		char *str = NULL;
		size_t size = 0;
		size_t capacity = 8; /* Half of the default capacity */

		do {
			curchar = readChar(linkerScript);
			/* Whitespace or a comment start end the token */
			if (isWhiteSpace(curchar) || curchar == ';')
				curchar = '\0';

			if (size >= capacity || str == NULL) {
				capacity *= 2;
				str = realloc(str, capacity);
				if (!str)
					err(1, "%s: Failed to allocate memory for token",
					    __func__);
			}
		} while (curchar);

		token.type = TOKEN_INVALID;
		for (enum LinkerScriptCommand i = 0; i < COMMAND_INVALID; i++) {
			if (!strcmp(commands[i], str)) {
				token.type = TOKEN_COMMAND;
				token.attr.command = i;
				break;
			}
		}

		if (token.type == TOKEN_INVALID) {
			for (enum SectionType type = 0; type < SECTTYPE_INVALID;
			     type++) {
				if (!strcmp(memtypes[type], str)) {
					token.type = TOKEN_BANK;
					token.attr.bank = type;
					break;
				}
			}
		}

		/* None of the string matched, so perhaps we have a number? */
		if (tryParseNumber(str, &token.attr.number))
			token.type = TOKEN_NUMBER;
		else
			errx(1, "Unknown token \"%s\" on linker script line %u",
			     lineNo);

		free(str);
	}

	return &token;
}

static void processCommand(enum LinkerScriptCommand command, uint16_t arg,
			   uint16_t *pc)
{
	uint16_t alignedPC;

	switch (command) {
	case COMMAND_INVALID:
		/* Not reached */

	case COMMAND_ORG:
		if (*pc < arg)
			errx(1, "Line %u: `ORG` cannot be used to go backwards",
			     lineNo);
		*pc = arg;
		break;

	case COMMAND_ALIGN:
		if (arg > 16)
			arg = 16;
		alignedPC = *pc & sect_AlignmentMask(arg);
		/* Are we already aligned? */
		if (alignedPC != *pc)
			*pc = alignedPC + (1 << arg);
	}
}

enum LinkerScriptParserState {
	PARSER_FIRSTTIME,
	PARSER_LINESTART,
	PARSER_LINEEND
};

/* Part of internal state, but has data that needs to be freed */
static uint16_t *curaddr[SECTTYPE_INVALID];

/* Put as global to ensure it's initialized only once */
static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME;

struct SectionPlacement *script_NextSection(void)
{
	static struct SectionPlacement section;
	static enum SectionType type;
	static uint32_t bank;

	if (parserState == PARSER_FIRSTTIME) {
		lineNo = 1;

		/* Init PC for all banks */
		for (enum SectionType i = 0; i < SECTTYPE_INVALID; i++) {
			curaddr[i] = malloc(sizeof(*curaddr[i]) * nbbanks(i));
			for (uint32_t bank = 0; bank < nbbanks(i); bank++)
				curaddr[i][bank] = startaddr[i];
		}

		type = SECTTYPE_INVALID;

		parserState = PARSER_LINESTART;
	}

	for (;;) {
		struct LinkerScriptToken const *token = nextToken();

		if (type != SECTTYPE_INVALID) {
			if (curaddr[type][bank] > endaddr(type))
				errx(1, "Line %u: PC overflowed (%u > %u)",
				     curaddr[type][bank], endaddr(type));
			if (curaddr[type][bank] < startaddr[type])
				errx(1, "Line %u: PC underflowed (%u < %u)",
				     curaddr[type][bank], startaddr[type]);
		}

		switch (parserState) {
		case PARSER_FIRSTTIME:
			/* Not reached */

		case PARSER_LINESTART:
			switch (token->type) {
			case TOKEN_INVALID:
				/* Not reached */

			case TOKEN_EOF:
				return NULL;

			case TOKEN_NUMBER:
				errx(1, "Line %u: stray number", lineNo);

			case TOKEN_NEWLINE:
				lineNo++;
				break;

			case TOKEN_SECTION:
				parserState = PARSER_LINEEND;

				if (type == SECTTYPE_INVALID)
					errx(1, "Line %u: Didn't specify a location before the section",
					     lineNo);

				section.section =
					sect_GetSection(token->attr.string);
				section.address = curaddr[type][bank];
				section.bank = bank;

				curaddr[type][bank] += section.section->size;
				return &section;

			enum LinkerScriptTokenType tokType;
			union LinkerScriptTokenAttr attr;
			bool hasArg;
			uint32_t arg;

			case TOKEN_COMMAND:
			case TOKEN_BANK:
				tokType = token->type;
				attr = token->attr;

				token = nextToken();
				hasArg = token->type == TOKEN_NUMBER;
				/*
				 * Leaving `arg` uninitialized when `!hasArg`
				 * causes GCC to warn about its use as an
				 * argument to `processCommand`. This cannot
				 * happen because `hasArg` has to be true, but
				 * silence the warning anyways.
				 * I dislike doing this because it could swallow
				 * actual errors, but I don't have a choice.
				 */
				arg = hasArg ? token->attr.number : 0;

				if (tokType == TOKEN_COMMAND) {
					if (type == SECTTYPE_INVALID)
						errx(1, "Line %u: Didn't specify a location before the command",
						     lineNo);
					if (!hasArg)
						errx(1, "Line %u: Command specified without an argument",
						     lineNo);

					processCommand(attr.command, arg,
						       &curaddr[type][bank]);
				} else { /* TOKEN_BANK */
					type = attr.bank;
					/*
					 * If there's only one bank,
					 * specifying the number is optional.
					 */
					if (!hasArg && nbbanks(type) != 1)
						errx(1, "Line %u: Didn't specify a bank number",
						     lineNo);
					else if (!hasArg)
						arg = bankranges[type][0];
					else if (arg < bankranges[type][0])
						errx(1, "Line %u: specified bank number is too low (%u < %u)",
						     lineNo, arg,
						     bankranges[type][0]);
					else if (arg > bankranges[type][1])
						errx(1, "Line %u: specified bank number is too high (%u > %u)",
						     lineNo, arg,
						     bankranges[type][1]);
					bank = arg;
				}

				/* If we read a token we shouldn't have... */
				if (token->type != TOKEN_NUMBER)
					goto lineend;
				break;
			}
			break;

		case PARSER_LINEEND:
lineend:
			if (token->type == TOKEN_EOF)
				return NULL;
			else if (token->type != TOKEN_NEWLINE)
				errx(1, "Line %u: Unexpected token at the end",
				     lineNo);
			lineNo++;
			parserState = PARSER_LINESTART;
			break;
		}
	}
}

void script_Cleanup(void)
{
	for (enum SectionType type = 0; type < SECTTYPE_INVALID; type++)
		free(curaddr[type]);
}

	#include <stdlib.h>
	#include <stdbool.h>
	#include <stdint.h>
	#include <string.h>
	#include <ctype.h>

	#include "link/main.h"
	#include "link/script.h"
	#include "link/section.h"

	#include "extern/err.h"

	static inline bool isWhiteSpace(int c)
	{
	return c == ' ' \|\| c == '\t';
	}

	static inline bool isNewline(int c)
	{
	return c == '\r' \|\| c == '\n';
	}

	static bool tryParseNumber(char const str, uint32_t number)
	{
	char const digits[] = {
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
	'A', 'B', 'C', 'D', 'E', 'F'
	};
	uint8_t base = 10;

	if (*str == '$') {
	str++;
	base = 16;
	}

	/* An empty string is not a number */
	if (!*str)
	return false;

	*number = 0;
	do {
	char chr = toupper(*str++);
	uint8_t digit = 0;

	while (digit < base) {
	if (chr == digits[digit])
	break;
	}
	if (digit == base)
	return false;
	number = number * base + digit;
	} while (*str);

	return true;
	}

	enum LinkerScriptTokenType {
	TOKEN_NEWLINE,
	TOKEN_COMMAND,
	TOKEN_BANK,
	TOKEN_NUMBER,
	TOKEN_SECTION,
	TOKEN_EOF,

	TOKEN_INVALID
	};

	enum LinkerScriptCommand {
	COMMAND_ORG,
	COMMAND_ALIGN,

	COMMAND_INVALID
	};

	struct LinkerScriptToken {
	enum LinkerScriptTokenType type;
	union LinkerScriptTokenAttr {
	enum LinkerScriptCommand command;
	enum SectionType bank;
	uint32_t number;
	char *string;
	} attr;
	};

	static char const * const memtypes[] = {
	[SECTTYPE_ROM0] = "ROM0",
	[SECTTYPE_ROMX] = "ROMX",
	[SECTTYPE_VRAM] = "VRAM",
	[SECTTYPE_SRAM] = "SRAM",
	[SECTTYPE_WRAM0] = "WRAM0",
	[SECTTYPE_WRAMX] = "WRAMX",
	[SECTTYPE_OAM] = "OAM",
	[SECTTYPE_HRAM] = "HRAM"
	};

	static char const * const commands[] = {
	[COMMAND_ORG] = "ORG",
	[COMMAND_ALIGN] = "ALIGN"
	};

	static uint32_t lineNo;

	static int readChar(FILE *file)
	{
	int curchar = getc_unlocked(file);

	if (curchar == EOF && ferror(file))
	err(1, "%s: Unexpected error reading linker script", __func__);
	return curchar;
	}

	static struct LinkerScriptToken const *nextToken(void)
	{
	static struct LinkerScriptToken token;
	int curchar;

	/* If the token has a string, make sure to avoid leaking it */
	if (token.type == TOKEN_SECTION)
	free(token.attr.string);

	/* Skip initial whitespace... */
	do
	curchar = readChar(linkerScript);
	while (isWhiteSpace(curchar));

	/* If this is a comment, skip to the end of the line */
	if (curchar == ';') {
	do
	curchar = readChar(linkerScript);
	while (!isNewline(curchar) && curchar != EOF);
	}

	if (curchar == EOF) {
	token.type = TOKEN_EOF;
	} else if (isNewline(curchar)) {
	/* If we have a newline char, this is a newline token */
	token.type = TOKEN_NEWLINE;

	/* FIXME: This works with CRLF newlines, but not CR-only */
	if (curchar == '\r')
	readChar(linkerScript); /* Read and discard LF */
	} else if (curchar == '"') {
	/* If we have a string start, this is a section name */
	token.type = TOKEN_SECTION;
	token.attr.string = NULL; /* Force initial alloc */

	size_t size = 0;
	size_t capacity = 16; /* Half of the default capacity */

	do {
	curchar = readChar(linkerScript);
	if (curchar == EOF \|\| isNewline(curchar))
	errx(1, "Line %u: Unterminated string", lineNo);
	else if (curchar == '"')
	/* Quotes force a string termination */
	curchar = '\0';

	if (size >= capacity \|\| token.attr.string == NULL) {
	capacity *= 2;
	token.attr.string = realloc(token.attr.string,
	capacity);
	if (!token.attr.string)
	err(1, "%s: Failed to allocate memory for section name",
	__func__);
	}
	token.attr.string[size++] = curchar;
	} while (curchar);
	} else {
	/* This is either a number, command or bank, that is: a word */
	char *str = NULL;
	size_t size = 0;
	size_t capacity = 8; /* Half of the default capacity */

	do {
	curchar = readChar(linkerScript);
	/* Whitespace or a comment start end the token */
	if (isWhiteSpace(curchar) \|\| curchar == ';')
	curchar = '\0';

	if (size >= capacity \|\| str == NULL) {
	capacity *= 2;
	str = realloc(str, capacity);
	if (!str)
	err(1, "%s: Failed to allocate memory for token",
	__func__);
	}
	} while (curchar);

	token.type = TOKEN_INVALID;
	for (enum LinkerScriptCommand i = 0; i < COMMAND_INVALID; i++) {
	if (!strcmp(commands[i], str)) {
	token.type = TOKEN_COMMAND;
	token.attr.command = i;
	break;
	}
	}

	if (token.type == TOKEN_INVALID) {
	for (enum SectionType type = 0; type < SECTTYPE_INVALID;
	type++) {
	if (!strcmp(memtypes[type], str)) {
	token.type = TOKEN_BANK;
	token.attr.bank = type;
	break;
	}
	}
	}

	/* None of the string matched, so perhaps we have a number? */
	if (tryParseNumber(str, &token.attr.number))
	token.type = TOKEN_NUMBER;
	else
	errx(1, "Unknown token \"%s\" on linker script line %u",
	lineNo);

	free(str);
	}

	return &token;
	}

	static void processCommand(enum LinkerScriptCommand command, uint16_t arg,
	uint16_t *pc)
	{
	uint16_t alignedPC;

	switch (command) {
	case COMMAND_INVALID:
	/* Not reached */

	case COMMAND_ORG:
	if (*pc < arg)
	errx(1, "Line %u: `ORG` cannot be used to go backwards",
	lineNo);
	*pc = arg;
	break;

	case COMMAND_ALIGN:
	if (arg > 16)
	arg = 16;
	alignedPC = *pc & sect_AlignmentMask(arg);
	/* Are we already aligned? */
	if (alignedPC != *pc)
	*pc = alignedPC + (1 << arg);
	}
	}

	enum LinkerScriptParserState {
	PARSER_FIRSTTIME,
	PARSER_LINESTART,
	PARSER_LINEEND
	};

	/* Part of internal state, but has data that needs to be freed */
	static uint16_t *curaddr[SECTTYPE_INVALID];

	/* Put as global to ensure it's initialized only once */
	static enum LinkerScriptParserState parserState = PARSER_FIRSTTIME;

	struct SectionPlacement *script_NextSection(void)
	{
	static struct SectionPlacement section;
	static enum SectionType type;
	static uint32_t bank;

	if (parserState == PARSER_FIRSTTIME) {
	lineNo = 1;

	/* Init PC for all banks */
	for (enum SectionType i = 0; i < SECTTYPE_INVALID; i++) {
	curaddr[i] = malloc(sizeof(curaddr[i]) nbbanks(i));
	for (uint32_t bank = 0; bank < nbbanks(i); bank++)
	curaddr[i][bank] = startaddr[i];
	}

	type = SECTTYPE_INVALID;

	parserState = PARSER_LINESTART;
	}

	for (;;) {
	struct LinkerScriptToken const *token = nextToken();

	if (type != SECTTYPE_INVALID) {
	if (curaddr[type][bank] > endaddr(type))
	errx(1, "Line %u: PC overflowed (%u > %u)",
	curaddr[type][bank], endaddr(type));
	if (curaddr[type][bank] < startaddr[type])
	errx(1, "Line %u: PC underflowed (%u < %u)",
	curaddr[type][bank], startaddr[type]);
	}

	switch (parserState) {
	case PARSER_FIRSTTIME:
	/* Not reached */

	case PARSER_LINESTART:
	switch (token->type) {
	case TOKEN_INVALID:
	/* Not reached */

	case TOKEN_EOF:
	return NULL;

	case TOKEN_NUMBER:
	errx(1, "Line %u: stray number", lineNo);

	case TOKEN_NEWLINE:
	lineNo++;
	break;

	case TOKEN_SECTION:
	parserState = PARSER_LINEEND;

	if (type == SECTTYPE_INVALID)
	errx(1, "Line %u: Didn't specify a location before the section",
	lineNo);

	section.section =
	sect_GetSection(token->attr.string);
	section.address = curaddr[type][bank];
	section.bank = bank;

	curaddr[type][bank] += section.section->size;
	return &section;

	enum LinkerScriptTokenType tokType;
	union LinkerScriptTokenAttr attr;
	bool hasArg;
	uint32_t arg;

	case TOKEN_COMMAND:
	case TOKEN_BANK:
	tokType = token->type;
	attr = token->attr;

	token = nextToken();
	hasArg = token->type == TOKEN_NUMBER;
	/*
	* Leaving `arg` uninitialized when `!hasArg`
	* causes GCC to warn about its use as an
	* argument to `processCommand`. This cannot
	* happen because `hasArg` has to be true, but
	* silence the warning anyways.
	* I dislike doing this because it could swallow
	* actual errors, but I don't have a choice.
	*/
	arg = hasArg ? token->attr.number : 0;

	if (tokType == TOKEN_COMMAND) {
	if (type == SECTTYPE_INVALID)
	errx(1, "Line %u: Didn't specify a location before the command",
	lineNo);
	if (!hasArg)
	errx(1, "Line %u: Command specified without an argument",
	lineNo);

	processCommand(attr.command, arg,
	&curaddr[type][bank]);
	} else { /* TOKEN_BANK */
	type = attr.bank;
	/*
	* If there's only one bank,
	* specifying the number is optional.
	*/
	if (!hasArg && nbbanks(type) != 1)
	errx(1, "Line %u: Didn't specify a bank number",
	lineNo);
	else if (!hasArg)
	arg = bankranges[type][0];
	else if (arg < bankranges[type][0])
	errx(1, "Line %u: specified bank number is too low (%u < %u)",
	lineNo, arg,
	bankranges[type][0]);
	else if (arg > bankranges[type][1])
	errx(1, "Line %u: specified bank number is too high (%u > %u)",
	lineNo, arg,
	bankranges[type][1]);
	bank = arg;
	}

	/* If we read a token we shouldn't have... */
	if (token->type != TOKEN_NUMBER)
	goto lineend;
	break;
	}
	break;

	case PARSER_LINEEND:
	lineend:
	if (token->type == TOKEN_EOF)
	return NULL;
	else if (token->type != TOKEN_NEWLINE)
	errx(1, "Line %u: Unexpected token at the end",
	lineNo);
	lineNo++;
	parserState = PARSER_LINESTART;
	break;
	}
	}
	}

	void script_Cleanup(void)
	{
	for (enum SectionType type = 0; type < SECTTYPE_INVALID; type++)
	free(curaddr[type]);
	}