Created
July 2, 2018 21:37
-
-
Save gremerritt/1abfeb2aa80bac1a8264439b3283a6a3 to your computer and use it in GitHub Desktop.
boatlang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <regex.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#define NUM_TOKENS_DEFS 10 | |
#define WHITESPACE "WHITESPACE" | |
typedef struct tokendef { | |
char *pattern; | |
char *token; | |
regex_t comp_regex; | |
} tokendef; | |
typedef struct char_array { | |
char **array; | |
unsigned int total_len; | |
unsigned int current_len; | |
} char_array; | |
tokendef tokendefs[] = { | |
{ | |
.pattern = "[ \t]+", | |
.token = WHITESPACE, | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "int", | |
.token = "INT", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "float", | |
.token = "FLOAT", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "str", | |
.token = "STR", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "class", | |
.token = "CLASS", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "end", | |
.token = "END", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "=", | |
.token = "=", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "L?(\"(\\.|[^\\\"])*\"|'(\\.|[^\\\'])*')", | |
.token = "STRING_LITERAL", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "[A-Za-z_]+", | |
.token = "VAR", | |
.comp_regex = 0 | |
}, | |
{ | |
.pattern = "(\r\n|\r|\n)", | |
.token = WHITESPACE, | |
.comp_regex = 0 | |
}, | |
}; | |
void compile_regexes(); | |
char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len); | |
int main(int argc, char **argv) { | |
regmatch_t matches[1]; | |
unsigned char tokendef_index; | |
char fnd; | |
unsigned long int char_offset = 0; | |
char *str = "class Foo\n\tint bax = \"foobar\"end"; | |
int str_len = strlen(str); | |
int exec_err; | |
char_array token_values = { | |
.array = NULL, | |
.total_len = 0, | |
.current_len = 0 | |
}; | |
compile_regexes(); | |
while (1) { | |
tokendef_index = 0; | |
fnd = 0; | |
if (char_offset >= str_len) { | |
break; | |
} | |
for(tokendef_index=0; tokendef_index<NUM_TOKENS_DEFS; tokendef_index++) { | |
// printf("Testing <%s> for %s\n", str + char_offset, tokendefs[tokendef_index].token); | |
exec_err = regexec(&tokendefs[tokendef_index].comp_regex, str + char_offset, 1, matches, 0); | |
if (exec_err == REG_NOMATCH) { | |
continue; | |
} else if (exec_err) { | |
printf("Parsing error: Ran out of memory. Quiting...\n"); | |
break; | |
} else if (matches[0].rm_so == 0 && matches[0].rm_eo != 0) { | |
fnd = 1; | |
if (tokendefs[tokendef_index].token != WHITESPACE) { | |
add_token_value(&token_values, str, char_offset, matches[0].rm_eo); | |
printf(" [%s] matches: <%s>\n", tokendefs[tokendef_index].token, token_values.array[token_values.current_len-1]); | |
} | |
char_offset += matches[0].rm_eo; | |
break; | |
} | |
} | |
if (!fnd) { | |
printf("Syntax error starting at: %s\n", str + char_offset); | |
break; | |
} | |
} | |
} | |
void compile_regexes() { | |
printf("Compiling Regexes... \n"); | |
int i; | |
for(i=0; i<NUM_TOKENS_DEFS; i++) { | |
tokendef *t = &tokendefs[i]; | |
regcomp(&(t->comp_regex), t->pattern, REG_EXTENDED); | |
printf(" [%i] > %s\n", i, t->token); | |
} | |
printf("\n-----\n\n"); | |
} | |
char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len) { | |
if (array->total_len == 0) { | |
array->total_len = 2; | |
array->current_len = 0; | |
array->array = malloc( 2 * sizeof( char* ) ); | |
if (array->array == NULL) { | |
printf("Initial array malloc failed\n"); | |
return 1; | |
} | |
} else if (array->current_len == array->total_len) { | |
unsigned int new_len = array->total_len * 2; | |
unsigned int i; | |
// alloc the new array | |
char **new_array = malloc( new_len * sizeof( char* ) ); | |
if (array->array == NULL) { | |
printf("Temporary array malloc failed\n"); | |
return 1; | |
} | |
// copy the values | |
for(i=0; i<array->total_len; i++) { | |
new_array[i] = array->array[i]; | |
} | |
// free the old array and re-point it | |
free(array->array); | |
array->array = new_array; | |
array->total_len = new_len; | |
} | |
char *new_str = (char *) malloc( (char_len + 1) * sizeof(char) ); | |
if (new_str == NULL) { | |
printf("String alloc failed\n"); | |
return 1; | |
} | |
strncpy(new_str, str + char_offset, char_len); | |
new_str[char_len] = '\0'; | |
array->array[array->current_len++] = new_str; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment