Skip to content

Instantly share code, notes, and snippets.

@RealNeGate
Created July 14, 2020 22:13
Show Gist options
  • Save RealNeGate/4e693b799174d4b79562a4111c93d28a to your computer and use it in GitHub Desktop.
Save RealNeGate/4e693b799174d4b79562a4111c93d28a to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "DynArray.h"
typedef enum {
TOKEN_IDENTIFIER, // abc0123
TOKEN_NUMBER, // 1738
TOKEN_STRING, // "Blah"
TOKEN_OPEN_PAREN, // (
TOKEN_CLOSE_PAREN, // )
TOKEN_SEMICOLON, // ;
TOKEN_COMMA, // ,
} token_type_t;
typedef struct {
token_type_t type;
const char* source;
size_t length;
} token_t;
DEFINE_ARRAY(token_t);
// Adds two strings together
char* string_concat(const char* str1, const char* str2) {
size_t str1Len = strlen(str1);
size_t str2Len = strlen(str2);
char* new_str = malloc(str1Len + str2Len + 1); // +1 for the null terminator
memcpy(new_str, str1, str1Len);
memcpy(new_str + str1Len, str2, str2Len);
new_str[str1Len + str2Len] = '\0';
return new_str;
}
int IsIdentifier(char ch, int first) {
int v = (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_');
if(!first) v |= (ch >= '0' && ch <= '9');
return v;
}
int IsNumber(char ch) {
return (ch >= '0' && ch <= '9');
}
int IsWhitespace(char ch) {
return ch == '\r' || ch == '\n' || ch == '\t' || ch == ' ';
}
int IsToken(char ch) {
return ch == '(' || ch == ')' || ch == ';' || ch == ',';
}
//#define _DARRAY_THROW(msg) printf("Darray error: %s\n", #msg);
int TokenMatch(token_t* t, const char* str) {
size_t len = strlen(str);
return memcmp(t->source, str, t->length) == 0;
}
ARRAY_DECL(token_t) Lex(const char* text) {
ARRAY_DECL(token_t) tokens = { 0 };
const char* curr = text;
const char* eof = text + strlen(text);
char ch;
token_t t;
while (curr != eof) {
if (curr > eof) abort();
ch = *curr;
if (IsIdentifier(ch, 1)) {
t.type = TOKEN_IDENTIFIER;
t.source = curr;
while (curr != eof) {
if (!IsIdentifier(*curr, 0)) break;
curr++;
}
t.length = curr - t.source;
printf("Identifier: '%.*s'\n", t.length, t.source);
ARRAY_ADD(tokens, token_t, t);
}
else if (IsNumber(ch)) {
t.type = TOKEN_NUMBER;
t.source = curr;
while (curr != eof) {
if (!IsNumber(*curr)) break;
curr++;
}
t.length = curr - t.source;
printf("Number: '%.*s'\n", t.length, t.source);
ARRAY_ADD(tokens, token_t, t);
}
else if (IsToken(ch)) {
switch (ch) {
case '(': t.type = TOKEN_OPEN_PAREN; break;
case ')': t.type = TOKEN_CLOSE_PAREN; break;
case ';': t.type = TOKEN_SEMICOLON; break;
case ',': t.type = TOKEN_COMMA; break;
default: abort(); break;
}
t.source = curr;
t.length = 1;
curr++;
printf("Token: '%.*s'\n", t.length, t.source);
ARRAY_ADD(tokens, token_t, t);
}
else if (ch == '\"') {
curr++;
t.type = TOKEN_STRING;
t.source = curr;
while (curr != eof) {
if (*curr == '\"') break;
curr++;
}
if (*curr == '\"') {
t.length = curr - t.source;
printf("String: '%.*s'\n", t.length, t.source);
ARRAY_ADD(tokens, token_t, t);
curr++;
}
else {
// TODO: Error handling
// This string wasnt closed properly
abort();
}
}
else if (IsWhitespace(ch)) {
curr++;
}
else {
abort();
}
}
return tokens;
}
void ParserError(const char* str, ...) {
va_list args;
__crt_va_start(args, str);
printf("Parser error: ");
vprintf(str, args);
__crt_va_end(args);
abort();
}
int ExpectTokenType(const ARRAY_DECL(token_t)* tokens, int curr, token_type_t type) {
if (curr >= tokens->siz) return 0;
if (ARRAY_GET(*tokens, const token_t, curr)->type != type) return 0;
return 1;
}
void Parse(const ARRAY_DECL(token_t)* tokens) {
int curr = 0;
while (curr < tokens->siz) {
const token_t* t = ARRAY_GET(*tokens, const token_t, curr);
if (t->type == TOKEN_IDENTIFIER) {
if (TokenMatch(t, "print")) {
// Next token
curr++;
// Expect the '('
if (!ExpectTokenType(tokens, curr, TOKEN_OPEN_PAREN)) ParserError("%.*s", t->length, t->source);
curr++;
while (curr < tokens->siz) {
// Set 't' as a pointer to the parameter
t = ARRAY_GET(*tokens, const token_t, curr);
// If the paremeter is equal to the ')' then exit the printing steps
if (t->type == TOKEN_CLOSE_PAREN) break;
// Print the parameter
printf("PRINT: %.*s\n", t->length, t->source);
// Next token
curr++;
// Expect the ','
if (ExpectTokenType(tokens, curr, TOKEN_COMMA)) {
curr++;
}
else {
t = ARRAY_GET(*tokens, const token_t, curr);
if (t->type == TOKEN_CLOSE_PAREN) break;
}
}
// Expect the ')'
if (!ExpectTokenType(tokens, curr, TOKEN_CLOSE_PAREN)) ParserError("%.*s", t->length, t->source);
curr++;
// Expect the ';'
if (!ExpectTokenType(tokens, curr, TOKEN_SEMICOLON)) ParserError("%.*s", t->length, t->source);
curr++;
}
else ParserError("%.*s", t->length, t->source);
}
else ParserError("%.*s", t->length, t->source);
}
}
int main(int argc, char** argv) {
const char* text =
"print(\"Hello, World!\", 42);"
"print(\"I enjoy getting stung by bees.\");"
"print();";
ARRAY_DECL(token_t) tokens = Lex(text);
printf("=====================================\n");
Parse(&tokens);
printf("=====================================\n");
ARRAY_FREE(tokens);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment