RealNeGate/SimpleCompiler.c

## SimpleCompiler.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "DynArray.h"

typedef enum {
	TOKEN_IDENTIFIER,	// abc0123
	TOKEN_NUMBER,		// 1738
	TOKEN_STRING,		// "Blah"

	TOKEN_OPEN_PAREN,	// (
	TOKEN_CLOSE_PAREN,	// )
	TOKEN_SEMICOLON,	// ;
	TOKEN_COMMA,		// ,
} token_type_t;

typedef struct {
	token_type_t type;
	const char* source;
	size_t length;
} token_t;

DEFINE_ARRAY(token_t);

// Adds two strings together
char* string_concat(const char* str1, const char* str2) {
	size_t str1Len = strlen(str1);
	size_t str2Len = strlen(str2);
	char* new_str = malloc(str1Len + str2Len + 1); // +1 for the null terminator
	memcpy(new_str, str1, str1Len);
	memcpy(new_str + str1Len, str2, str2Len);
	new_str[str1Len + str2Len] = '\0';

	return new_str;
}

int IsIdentifier(char ch, int first) {
	int v = (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_');
	if(!first) v |= (ch >= '0' && ch <= '9');

	return v;
}

int IsNumber(char ch) {
	return (ch >= '0' && ch <= '9');
}

int IsWhitespace(char ch) {
	return ch == '\r' || ch == '\n' || ch == '\t' || ch == ' ';
}

int IsToken(char ch) {
	return ch == '(' || ch == ')' || ch == ';' || ch == ',';
}

//#define _DARRAY_THROW(msg) printf("Darray error: %s\n", #msg);

int TokenMatch(token_t* t, const char* str) {
	size_t len = strlen(str);
	return memcmp(t->source, str, t->length) == 0;
}

ARRAY_DECL(token_t) Lex(const char* text) {
	ARRAY_DECL(token_t) tokens = { 0 };

	const char* curr = text;
	const char* eof = text + strlen(text);

	char ch;
	token_t t;
	while (curr != eof) {
		if (curr > eof) abort();

		ch = *curr;
		if (IsIdentifier(ch, 1)) {
			t.type = TOKEN_IDENTIFIER;
			t.source = curr;

			while (curr != eof) {
				if (!IsIdentifier(*curr, 0)) break;
				curr++;
			}

			t.length = curr - t.source;
			printf("Identifier: '%.*s'\n", t.length, t.source);
			ARRAY_ADD(tokens, token_t, t);
		}
		else if (IsNumber(ch)) {
			t.type = TOKEN_NUMBER;
			t.source = curr;

			while (curr != eof) {
				if (!IsNumber(*curr)) break;
				curr++;
			}

			t.length = curr - t.source;
			printf("Number: '%.*s'\n", t.length, t.source);
			ARRAY_ADD(tokens, token_t, t);
		}
		else if (IsToken(ch)) {
			switch (ch) {
			case '(': t.type = TOKEN_OPEN_PAREN; break;
			case ')': t.type = TOKEN_CLOSE_PAREN; break;
			case ';': t.type = TOKEN_SEMICOLON; break;
			case ',': t.type = TOKEN_COMMA; break;
			default: abort(); break;
			}
			t.source = curr;
			t.length = 1;
			curr++;

			printf("Token: '%.*s'\n", t.length, t.source);
			ARRAY_ADD(tokens, token_t, t);
		}
		else if (ch == '\"') {
			curr++;
			t.type = TOKEN_STRING;
			t.source = curr;

			while (curr != eof) {
				if (*curr == '\"') break;
				curr++;
			}

			if (*curr == '\"') {
				t.length = curr - t.source;
				printf("String: '%.*s'\n", t.length, t.source);
				ARRAY_ADD(tokens, token_t, t);
				curr++;
			}
			else {
				// TODO: Error handling
				// This string wasnt closed properly
				abort();
			}
		}
		else if (IsWhitespace(ch)) {
			curr++;
		}
		else {
			abort();
		}
	}

	return tokens;
}

void ParserError(const char* str, ...) {
	va_list args;
	__crt_va_start(args, str);
	printf("Parser error: ");
	vprintf(str, args);
	__crt_va_end(args);

	abort();
}

int ExpectTokenType(const ARRAY_DECL(token_t)* tokens, int curr, token_type_t type) {
	if (curr >= tokens->siz) return 0;
	if (ARRAY_GET(*tokens, const token_t, curr)->type != type) return 0;

	return 1;
}

void Parse(const ARRAY_DECL(token_t)* tokens) {
	int curr = 0;
	while (curr < tokens->siz) {
		const token_t* t = ARRAY_GET(*tokens, const token_t, curr);

		if (t->type == TOKEN_IDENTIFIER) {
			if (TokenMatch(t, "print")) {
				// Next token
				curr++;

				// Expect the '('
				if (!ExpectTokenType(tokens, curr, TOKEN_OPEN_PAREN)) ParserError("%.*s", t->length, t->source);
				curr++;

				while (curr < tokens->siz) {
					// Set 't' as a pointer to the parameter
					t = ARRAY_GET(*tokens, const token_t, curr);
					// If the paremeter is equal to the ')' then exit the printing steps
					if (t->type == TOKEN_CLOSE_PAREN) break;

					// Print the parameter
					printf("PRINT: %.*s\n", t->length, t->source);

					// Next token
					curr++;

					// Expect the ','
					if (ExpectTokenType(tokens, curr, TOKEN_COMMA)) {
						curr++;
					}
					else {
						t = ARRAY_GET(*tokens, const token_t, curr);
						if (t->type == TOKEN_CLOSE_PAREN) break;
					}
				}

				// Expect the ')'
				if (!ExpectTokenType(tokens, curr, TOKEN_CLOSE_PAREN)) ParserError("%.*s", t->length, t->source);
				curr++;

				// Expect the ';'
				if (!ExpectTokenType(tokens, curr, TOKEN_SEMICOLON)) ParserError("%.*s", t->length, t->source);
				curr++;
			}
			else ParserError("%.*s", t->length, t->source);
		}
		else ParserError("%.*s", t->length, t->source);
	}
}

int main(int argc, char** argv) {
	const char* text =
		"print(\"Hello, World!\", 42);"
		"print(\"I enjoy getting stung by bees.\");"
		"print();";

	ARRAY_DECL(token_t) tokens = Lex(text);

	printf("=====================================\n");

	Parse(&tokens);

	printf("=====================================\n");

	ARRAY_FREE(tokens);
	return 0;
}
	#include <stdio.h>
	#include <string.h>
	#include <stdlib.h>
	#include "DynArray.h"

	typedef enum {
	TOKEN_IDENTIFIER, // abc0123
	TOKEN_NUMBER, // 1738
	TOKEN_STRING, // "Blah"

	TOKEN_OPEN_PAREN, // (
	TOKEN_CLOSE_PAREN, // )
	TOKEN_SEMICOLON, // ;
	TOKEN_COMMA, // ,
	} token_type_t;

	typedef struct {
	token_type_t type;
	const char* source;
	size_t length;
	} token_t;

	DEFINE_ARRAY(token_t);

	// Adds two strings together
	char* string_concat(const char* str1, const char* str2) {
	size_t str1Len = strlen(str1);
	size_t str2Len = strlen(str2);
	char* new_str = malloc(str1Len + str2Len + 1); // +1 for the null terminator
	memcpy(new_str, str1, str1Len);
	memcpy(new_str + str1Len, str2, str2Len);
	new_str[str1Len + str2Len] = '\0';

	return new_str;
	}

	int IsIdentifier(char ch, int first) {
	int v = (ch >= 'a' && ch <= 'z') \|\| (ch >= 'A' && ch <= 'Z') \|\| (ch == '_');
	if(!first) v \|= (ch >= '0' && ch <= '9');

	return v;
	}

	int IsNumber(char ch) {
	return (ch >= '0' && ch <= '9');
	}

	int IsWhitespace(char ch) {
	return ch == '\r' \|\| ch == '\n' \|\| ch == '\t' \|\| ch == ' ';
	}

	int IsToken(char ch) {
	return ch == '(' \|\| ch == ')' \|\| ch == ';' \|\| ch == ',';
	}

	//#define _DARRAY_THROW(msg) printf("Darray error: %s\n", #msg);

	int TokenMatch(token_t* t, const char* str) {
	size_t len = strlen(str);
	return memcmp(t->source, str, t->length) == 0;
	}

	ARRAY_DECL(token_t) Lex(const char* text) {
	ARRAY_DECL(token_t) tokens = { 0 };

	const char* curr = text;
	const char* eof = text + strlen(text);

	char ch;
	token_t t;
	while (curr != eof) {
	if (curr > eof) abort();

	ch = *curr;
	if (IsIdentifier(ch, 1)) {
	t.type = TOKEN_IDENTIFIER;
	t.source = curr;

	while (curr != eof) {
	if (!IsIdentifier(*curr, 0)) break;
	curr++;
	}

	t.length = curr - t.source;
	printf("Identifier: '%.*s'\n", t.length, t.source);
	ARRAY_ADD(tokens, token_t, t);
	}
	else if (IsNumber(ch)) {
	t.type = TOKEN_NUMBER;
	t.source = curr;

	while (curr != eof) {
	if (!IsNumber(*curr)) break;
	curr++;
	}

	t.length = curr - t.source;
	printf("Number: '%.*s'\n", t.length, t.source);
	ARRAY_ADD(tokens, token_t, t);
	}
	else if (IsToken(ch)) {
	switch (ch) {
	case '(': t.type = TOKEN_OPEN_PAREN; break;
	case ')': t.type = TOKEN_CLOSE_PAREN; break;
	case ';': t.type = TOKEN_SEMICOLON; break;
	case ',': t.type = TOKEN_COMMA; break;
	default: abort(); break;
	}
	t.source = curr;
	t.length = 1;
	curr++;

	printf("Token: '%.*s'\n", t.length, t.source);
	ARRAY_ADD(tokens, token_t, t);
	}
	else if (ch == '\"') {
	curr++;
	t.type = TOKEN_STRING;
	t.source = curr;

	while (curr != eof) {
	if (*curr == '\"') break;
	curr++;
	}

	if (*curr == '\"') {
	t.length = curr - t.source;
	printf("String: '%.*s'\n", t.length, t.source);
	ARRAY_ADD(tokens, token_t, t);
	curr++;
	}
	else {
	// TODO: Error handling
	// This string wasnt closed properly
	abort();
	}
	}
	else if (IsWhitespace(ch)) {
	curr++;
	}
	else {
	abort();
	}
	}

	return tokens;
	}

	void ParserError(const char* str, ...) {
	va_list args;
	__crt_va_start(args, str);
	printf("Parser error: ");
	vprintf(str, args);
	__crt_va_end(args);

	abort();
	}

	int ExpectTokenType(const ARRAY_DECL(token_t)* tokens, int curr, token_type_t type) {
	if (curr >= tokens->siz) return 0;
	if (ARRAY_GET(*tokens, const token_t, curr)->type != type) return 0;

	return 1;
	}

	void Parse(const ARRAY_DECL(token_t)* tokens) {
	int curr = 0;
	while (curr < tokens->siz) {
	const token_t* t = ARRAY_GET(*tokens, const token_t, curr);

	if (t->type == TOKEN_IDENTIFIER) {
	if (TokenMatch(t, "print")) {
	// Next token
	curr++;

	// Expect the '('
	if (!ExpectTokenType(tokens, curr, TOKEN_OPEN_PAREN)) ParserError("%.*s", t->length, t->source);
	curr++;

	while (curr < tokens->siz) {
	// Set 't' as a pointer to the parameter
	t = ARRAY_GET(*tokens, const token_t, curr);
	// If the paremeter is equal to the ')' then exit the printing steps
	if (t->type == TOKEN_CLOSE_PAREN) break;

	// Print the parameter
	printf("PRINT: %.*s\n", t->length, t->source);

	// Next token
	curr++;

	// Expect the ','
	if (ExpectTokenType(tokens, curr, TOKEN_COMMA)) {
	curr++;
	}
	else {
	t = ARRAY_GET(*tokens, const token_t, curr);
	if (t->type == TOKEN_CLOSE_PAREN) break;
	}
	}

	// Expect the ')'
	if (!ExpectTokenType(tokens, curr, TOKEN_CLOSE_PAREN)) ParserError("%.*s", t->length, t->source);
	curr++;

	// Expect the ';'
	if (!ExpectTokenType(tokens, curr, TOKEN_SEMICOLON)) ParserError("%.*s", t->length, t->source);
	curr++;
	}
	else ParserError("%.*s", t->length, t->source);
	}
	else ParserError("%.*s", t->length, t->source);
	}
	}

	int main(int argc, char** argv) {
	const char* text =
	"print(\"Hello, World!\", 42);"
	"print(\"I enjoy getting stung by bees.\");"
	"print();";

	ARRAY_DECL(token_t) tokens = Lex(text);

	printf("=====================================\n");

	Parse(&tokens);

	printf("=====================================\n");

	ARRAY_FREE(tokens);
	return 0;
	}