Skip to content

Instantly share code, notes, and snippets.

@gremerritt
Created July 2, 2018 21:37
Show Gist options
  • Save gremerritt/1abfeb2aa80bac1a8264439b3283a6a3 to your computer and use it in GitHub Desktop.
Save gremerritt/1abfeb2aa80bac1a8264439b3283a6a3 to your computer and use it in GitHub Desktop.
boatlang
#include <stdio.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>
#define NUM_TOKENS_DEFS 10
#define WHITESPACE "WHITESPACE"
typedef struct tokendef {
char *pattern;
char *token;
regex_t comp_regex;
} tokendef;
typedef struct char_array {
char **array;
unsigned int total_len;
unsigned int current_len;
} char_array;
tokendef tokendefs[] = {
{
.pattern = "[ \t]+",
.token = WHITESPACE,
.comp_regex = 0
},
{
.pattern = "int",
.token = "INT",
.comp_regex = 0
},
{
.pattern = "float",
.token = "FLOAT",
.comp_regex = 0
},
{
.pattern = "str",
.token = "STR",
.comp_regex = 0
},
{
.pattern = "class",
.token = "CLASS",
.comp_regex = 0
},
{
.pattern = "end",
.token = "END",
.comp_regex = 0
},
{
.pattern = "=",
.token = "=",
.comp_regex = 0
},
{
.pattern = "L?(\"(\\.|[^\\\"])*\"|'(\\.|[^\\\'])*')",
.token = "STRING_LITERAL",
.comp_regex = 0
},
{
.pattern = "[A-Za-z_]+",
.token = "VAR",
.comp_regex = 0
},
{
.pattern = "(\r\n|\r|\n)",
.token = WHITESPACE,
.comp_regex = 0
},
};
void compile_regexes();
char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len);
int main(int argc, char **argv) {
regmatch_t matches[1];
unsigned char tokendef_index;
char fnd;
unsigned long int char_offset = 0;
char *str = "class Foo\n\tint bax = \"foobar\"end";
int str_len = strlen(str);
int exec_err;
char_array token_values = {
.array = NULL,
.total_len = 0,
.current_len = 0
};
compile_regexes();
while (1) {
tokendef_index = 0;
fnd = 0;
if (char_offset >= str_len) {
break;
}
for(tokendef_index=0; tokendef_index<NUM_TOKENS_DEFS; tokendef_index++) {
// printf("Testing <%s> for %s\n", str + char_offset, tokendefs[tokendef_index].token);
exec_err = regexec(&tokendefs[tokendef_index].comp_regex, str + char_offset, 1, matches, 0);
if (exec_err == REG_NOMATCH) {
continue;
} else if (exec_err) {
printf("Parsing error: Ran out of memory. Quiting...\n");
break;
} else if (matches[0].rm_so == 0 && matches[0].rm_eo != 0) {
fnd = 1;
if (tokendefs[tokendef_index].token != WHITESPACE) {
add_token_value(&token_values, str, char_offset, matches[0].rm_eo);
printf(" [%s] matches: <%s>\n", tokendefs[tokendef_index].token, token_values.array[token_values.current_len-1]);
}
char_offset += matches[0].rm_eo;
break;
}
}
if (!fnd) {
printf("Syntax error starting at: %s\n", str + char_offset);
break;
}
}
}
void compile_regexes() {
printf("Compiling Regexes... \n");
int i;
for(i=0; i<NUM_TOKENS_DEFS; i++) {
tokendef *t = &tokendefs[i];
regcomp(&(t->comp_regex), t->pattern, REG_EXTENDED);
printf(" [%i] > %s\n", i, t->token);
}
printf("\n-----\n\n");
}
char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len) {
if (array->total_len == 0) {
array->total_len = 2;
array->current_len = 0;
array->array = malloc( 2 * sizeof( char* ) );
if (array->array == NULL) {
printf("Initial array malloc failed\n");
return 1;
}
} else if (array->current_len == array->total_len) {
unsigned int new_len = array->total_len * 2;
unsigned int i;
// alloc the new array
char **new_array = malloc( new_len * sizeof( char* ) );
if (array->array == NULL) {
printf("Temporary array malloc failed\n");
return 1;
}
// copy the values
for(i=0; i<array->total_len; i++) {
new_array[i] = array->array[i];
}
// free the old array and re-point it
free(array->array);
array->array = new_array;
array->total_len = new_len;
}
char *new_str = (char *) malloc( (char_len + 1) * sizeof(char) );
if (new_str == NULL) {
printf("String alloc failed\n");
return 1;
}
strncpy(new_str, str + char_offset, char_len);
new_str[char_len] = '\0';
array->array[array->current_len++] = new_str;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment