Created
July 14, 2020 22:13
-
-
Save RealNeGate/4e693b799174d4b79562a4111c93d28a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include "DynArray.h" | |
typedef enum { | |
TOKEN_IDENTIFIER, // abc0123 | |
TOKEN_NUMBER, // 1738 | |
TOKEN_STRING, // "Blah" | |
TOKEN_OPEN_PAREN, // ( | |
TOKEN_CLOSE_PAREN, // ) | |
TOKEN_SEMICOLON, // ; | |
TOKEN_COMMA, // , | |
} token_type_t; | |
typedef struct { | |
token_type_t type; | |
const char* source; | |
size_t length; | |
} token_t; | |
DEFINE_ARRAY(token_t); | |
// Adds two strings together | |
char* string_concat(const char* str1, const char* str2) { | |
size_t str1Len = strlen(str1); | |
size_t str2Len = strlen(str2); | |
char* new_str = malloc(str1Len + str2Len + 1); // +1 for the null terminator | |
memcpy(new_str, str1, str1Len); | |
memcpy(new_str + str1Len, str2, str2Len); | |
new_str[str1Len + str2Len] = '\0'; | |
return new_str; | |
} | |
int IsIdentifier(char ch, int first) { | |
int v = (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch == '_'); | |
if(!first) v |= (ch >= '0' && ch <= '9'); | |
return v; | |
} | |
int IsNumber(char ch) { | |
return (ch >= '0' && ch <= '9'); | |
} | |
int IsWhitespace(char ch) { | |
return ch == '\r' || ch == '\n' || ch == '\t' || ch == ' '; | |
} | |
int IsToken(char ch) { | |
return ch == '(' || ch == ')' || ch == ';' || ch == ','; | |
} | |
//#define _DARRAY_THROW(msg) printf("Darray error: %s\n", #msg); | |
int TokenMatch(token_t* t, const char* str) { | |
size_t len = strlen(str); | |
return memcmp(t->source, str, t->length) == 0; | |
} | |
ARRAY_DECL(token_t) Lex(const char* text) { | |
ARRAY_DECL(token_t) tokens = { 0 }; | |
const char* curr = text; | |
const char* eof = text + strlen(text); | |
char ch; | |
token_t t; | |
while (curr != eof) { | |
if (curr > eof) abort(); | |
ch = *curr; | |
if (IsIdentifier(ch, 1)) { | |
t.type = TOKEN_IDENTIFIER; | |
t.source = curr; | |
while (curr != eof) { | |
if (!IsIdentifier(*curr, 0)) break; | |
curr++; | |
} | |
t.length = curr - t.source; | |
printf("Identifier: '%.*s'\n", t.length, t.source); | |
ARRAY_ADD(tokens, token_t, t); | |
} | |
else if (IsNumber(ch)) { | |
t.type = TOKEN_NUMBER; | |
t.source = curr; | |
while (curr != eof) { | |
if (!IsNumber(*curr)) break; | |
curr++; | |
} | |
t.length = curr - t.source; | |
printf("Number: '%.*s'\n", t.length, t.source); | |
ARRAY_ADD(tokens, token_t, t); | |
} | |
else if (IsToken(ch)) { | |
switch (ch) { | |
case '(': t.type = TOKEN_OPEN_PAREN; break; | |
case ')': t.type = TOKEN_CLOSE_PAREN; break; | |
case ';': t.type = TOKEN_SEMICOLON; break; | |
case ',': t.type = TOKEN_COMMA; break; | |
default: abort(); break; | |
} | |
t.source = curr; | |
t.length = 1; | |
curr++; | |
printf("Token: '%.*s'\n", t.length, t.source); | |
ARRAY_ADD(tokens, token_t, t); | |
} | |
else if (ch == '\"') { | |
curr++; | |
t.type = TOKEN_STRING; | |
t.source = curr; | |
while (curr != eof) { | |
if (*curr == '\"') break; | |
curr++; | |
} | |
if (*curr == '\"') { | |
t.length = curr - t.source; | |
printf("String: '%.*s'\n", t.length, t.source); | |
ARRAY_ADD(tokens, token_t, t); | |
curr++; | |
} | |
else { | |
// TODO: Error handling | |
// This string wasnt closed properly | |
abort(); | |
} | |
} | |
else if (IsWhitespace(ch)) { | |
curr++; | |
} | |
else { | |
abort(); | |
} | |
} | |
return tokens; | |
} | |
void ParserError(const char* str, ...) { | |
va_list args; | |
__crt_va_start(args, str); | |
printf("Parser error: "); | |
vprintf(str, args); | |
__crt_va_end(args); | |
abort(); | |
} | |
int ExpectTokenType(const ARRAY_DECL(token_t)* tokens, int curr, token_type_t type) { | |
if (curr >= tokens->siz) return 0; | |
if (ARRAY_GET(*tokens, const token_t, curr)->type != type) return 0; | |
return 1; | |
} | |
void Parse(const ARRAY_DECL(token_t)* tokens) { | |
int curr = 0; | |
while (curr < tokens->siz) { | |
const token_t* t = ARRAY_GET(*tokens, const token_t, curr); | |
if (t->type == TOKEN_IDENTIFIER) { | |
if (TokenMatch(t, "print")) { | |
// Next token | |
curr++; | |
// Expect the '(' | |
if (!ExpectTokenType(tokens, curr, TOKEN_OPEN_PAREN)) ParserError("%.*s", t->length, t->source); | |
curr++; | |
while (curr < tokens->siz) { | |
// Set 't' as a pointer to the parameter | |
t = ARRAY_GET(*tokens, const token_t, curr); | |
// If the paremeter is equal to the ')' then exit the printing steps | |
if (t->type == TOKEN_CLOSE_PAREN) break; | |
// Print the parameter | |
printf("PRINT: %.*s\n", t->length, t->source); | |
// Next token | |
curr++; | |
// Expect the ',' | |
if (ExpectTokenType(tokens, curr, TOKEN_COMMA)) { | |
curr++; | |
} | |
else { | |
t = ARRAY_GET(*tokens, const token_t, curr); | |
if (t->type == TOKEN_CLOSE_PAREN) break; | |
} | |
} | |
// Expect the ')' | |
if (!ExpectTokenType(tokens, curr, TOKEN_CLOSE_PAREN)) ParserError("%.*s", t->length, t->source); | |
curr++; | |
// Expect the ';' | |
if (!ExpectTokenType(tokens, curr, TOKEN_SEMICOLON)) ParserError("%.*s", t->length, t->source); | |
curr++; | |
} | |
else ParserError("%.*s", t->length, t->source); | |
} | |
else ParserError("%.*s", t->length, t->source); | |
} | |
} | |
int main(int argc, char** argv) { | |
const char* text = | |
"print(\"Hello, World!\", 42);" | |
"print(\"I enjoy getting stung by bees.\");" | |
"print();"; | |
ARRAY_DECL(token_t) tokens = Lex(text); | |
printf("=====================================\n"); | |
Parse(&tokens); | |
printf("=====================================\n"); | |
ARRAY_FREE(tokens); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment