gremerritt/boat.c

## boat.c
#include <stdio.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>

#define NUM_TOKENS_DEFS 10
#define WHITESPACE "WHITESPACE"

typedef struct tokendef {
    char *pattern;
    char *token;
    regex_t comp_regex;
} tokendef;

typedef struct char_array {
    char **array;
    unsigned int total_len;
    unsigned int current_len;
} char_array;

tokendef tokendefs[] = {
    {
        .pattern = "[ \t]+",
        .token = WHITESPACE,
        .comp_regex = 0
    },
    {
        .pattern = "int",
        .token = "INT",
        .comp_regex = 0
    },
    {
        .pattern = "float",
        .token = "FLOAT",
        .comp_regex = 0
    },
    {
        .pattern = "str",
        .token = "STR",
        .comp_regex = 0
    },
    {
        .pattern = "class",
        .token = "CLASS",
        .comp_regex = 0
    },
    {
        .pattern = "end",
        .token = "END",
        .comp_regex = 0
    },
    {
        .pattern = "=",
        .token = "=",
        .comp_regex = 0
    },
    {
        .pattern = "L?(\"(\\.|[^\\\"])*\"|'(\\.|[^\\\'])*')",
        .token = "STRING_LITERAL",
        .comp_regex = 0
    },
    {
        .pattern = "[A-Za-z_]+",
        .token = "VAR",
        .comp_regex = 0
    },
    {
        .pattern = "(\r\n|\r|\n)",
        .token = WHITESPACE,
        .comp_regex = 0
    },
};

void compile_regexes();
char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len);

int main(int argc, char **argv) {
    regmatch_t matches[1];
    unsigned char tokendef_index;
    char fnd;
    unsigned long int char_offset = 0;
    char *str = "class Foo\n\tint bax = \"foobar\"end";
    int str_len = strlen(str);
    int exec_err;
    char_array token_values = {
        .array = NULL,
        .total_len = 0,
        .current_len = 0
    };

    compile_regexes();

    while (1) {
        tokendef_index = 0;
        fnd = 0;

        if (char_offset >= str_len) {
            break;
        }

        for(tokendef_index=0; tokendef_index<NUM_TOKENS_DEFS; tokendef_index++) {
            // printf("Testing <%s> for %s\n", str + char_offset, tokendefs[tokendef_index].token);
            exec_err = regexec(&tokendefs[tokendef_index].comp_regex, str + char_offset, 1, matches, 0);

            if (exec_err == REG_NOMATCH) {
                continue;
            } else if (exec_err) {
                printf("Parsing error: Ran out of memory. Quiting...\n");
                break;
            } else if (matches[0].rm_so == 0 && matches[0].rm_eo != 0) {
                fnd = 1;
                if (tokendefs[tokendef_index].token != WHITESPACE) {
                    add_token_value(&token_values, str, char_offset, matches[0].rm_eo);
                    printf("  [%s] matches: <%s>\n", tokendefs[tokendef_index].token, token_values.array[token_values.current_len-1]);
                }

                char_offset += matches[0].rm_eo;
                break;
            }
        }

        if (!fnd) {
            printf("Syntax error starting at: %s\n", str + char_offset);
            break;
        }
    }
}

void compile_regexes() {
    printf("Compiling Regexes... \n");

    int i;

    for(i=0; i<NUM_TOKENS_DEFS; i++) {
        tokendef *t = &tokendefs[i];

        regcomp(&(t->comp_regex), t->pattern, REG_EXTENDED);
        printf("  [%i] > %s\n", i, t->token);
    }

    printf("\n-----\n\n");
}

char add_token_value(char_array *array, char *str, unsigned int char_offset, unsigned int char_len) {
    if (array->total_len == 0) {
        array->total_len = 2;
        array->current_len = 0;
        array->array = malloc( 2 * sizeof( char* ) );

        if (array->array == NULL) {
            printf("Initial array malloc failed\n");
            return 1;
        }
    } else if (array->current_len == array->total_len) {
        unsigned int new_len = array->total_len * 2;
        unsigned int i;

        // alloc the new array
        char **new_array = malloc( new_len * sizeof( char* ) );
        if (array->array == NULL) {
            printf("Temporary array malloc failed\n");
            return 1;
        }

        // copy the values
        for(i=0; i<array->total_len; i++) {
            new_array[i] = array->array[i];
        }

        // free the old array and re-point it
        free(array->array);
        array->array = new_array;
        array->total_len = new_len;
    }

    char *new_str = (char *) malloc( (char_len + 1) * sizeof(char) );

    if (new_str == NULL) {
        printf("String alloc failed\n");
        return 1;
    }

    strncpy(new_str, str + char_offset, char_len);
    new_str[char_len] = '\0';

    array->array[array->current_len++] = new_str;

    return 0;
}
	#include <stdio.h>
	#include <regex.h>
	#include <string.h>
	#include <stdlib.h>

	#define NUM_TOKENS_DEFS 10
	#define WHITESPACE "WHITESPACE"

	typedef struct tokendef {
	char *pattern;
	char *token;
	regex_t comp_regex;
	} tokendef;

	typedef struct char_array {
	char **array;
	unsigned int total_len;
	unsigned int current_len;
	} char_array;

	tokendef tokendefs[] = {
	{
	.pattern = "[ \t]+",
	.token = WHITESPACE,
	.comp_regex = 0
	},
	{
	.pattern = "int",
	.token = "INT",
	.comp_regex = 0
	},
	{
	.pattern = "float",
	.token = "FLOAT",
	.comp_regex = 0
	},
	{
	.pattern = "str",
	.token = "STR",
	.comp_regex = 0
	},
	{
	.pattern = "class",
	.token = "CLASS",
	.comp_regex = 0
	},
	{
	.pattern = "end",
	.token = "END",
	.comp_regex = 0
	},
	{
	.pattern = "=",
	.token = "=",
	.comp_regex = 0
	},
	{
	.pattern = "L?(\"(\\.\|[^\\\"])\"\|'(\\.\|[^\\\'])')",
	.token = "STRING_LITERAL",
	.comp_regex = 0
	},
	{
	.pattern = "[A-Za-z_]+",
	.token = "VAR",
	.comp_regex = 0
	},
	{
	.pattern = "(\r\n\|\r\|\n)",
	.token = WHITESPACE,
	.comp_regex = 0
	},
	};

	void compile_regexes();
	char add_token_value(char_array array, char str, unsigned int char_offset, unsigned int char_len);

	int main(int argc, char **argv) {
	regmatch_t matches[1];
	unsigned char tokendef_index;
	char fnd;
	unsigned long int char_offset = 0;
	char *str = "class Foo\n\tint bax = \"foobar\"end";
	int str_len = strlen(str);
	int exec_err;
	char_array token_values = {
	.array = NULL,
	.total_len = 0,
	.current_len = 0
	};

	compile_regexes();

	while (1) {
	tokendef_index = 0;
	fnd = 0;

	if (char_offset >= str_len) {
	break;
	}

	for(tokendef_index=0; tokendef_index<NUM_TOKENS_DEFS; tokendef_index++) {
	// printf("Testing <%s> for %s\n", str + char_offset, tokendefs[tokendef_index].token);
	exec_err = regexec(&tokendefs[tokendef_index].comp_regex, str + char_offset, 1, matches, 0);

	if (exec_err == REG_NOMATCH) {
	continue;
	} else if (exec_err) {
	printf("Parsing error: Ran out of memory. Quiting...\n");
	break;
	} else if (matches[0].rm_so == 0 && matches[0].rm_eo != 0) {
	fnd = 1;
	if (tokendefs[tokendef_index].token != WHITESPACE) {
	add_token_value(&token_values, str, char_offset, matches[0].rm_eo);
	printf(" [%s] matches: <%s>\n", tokendefs[tokendef_index].token, token_values.array[token_values.current_len-1]);
	}

	char_offset += matches[0].rm_eo;
	break;
	}
	}

	if (!fnd) {
	printf("Syntax error starting at: %s\n", str + char_offset);
	break;
	}
	}
	}

	void compile_regexes() {
	printf("Compiling Regexes... \n");

	int i;

	for(i=0; i<NUM_TOKENS_DEFS; i++) {
	tokendef *t = &tokendefs[i];

	regcomp(&(t->comp_regex), t->pattern, REG_EXTENDED);
	printf(" [%i] > %s\n", i, t->token);
	}

	printf("\n-----\n\n");
	}

	char add_token_value(char_array array, char str, unsigned int char_offset, unsigned int char_len) {
	if (array->total_len == 0) {
	array->total_len = 2;
	array->current_len = 0;
	array->array = malloc( 2 * sizeof( char* ) );

	if (array->array == NULL) {
	printf("Initial array malloc failed\n");
	return 1;
	}
	} else if (array->current_len == array->total_len) {
	unsigned int new_len = array->total_len * 2;
	unsigned int i;

	// alloc the new array
	char *new_array = malloc( new_len sizeof( char* ) );
	if (array->array == NULL) {
	printf("Temporary array malloc failed\n");
	return 1;
	}

	// copy the values
	for(i=0; i<array->total_len; i++) {
	new_array[i] = array->array[i];
	}

	// free the old array and re-point it
	free(array->array);
	array->array = new_array;
	array->total_len = new_len;
	}

	char new_str = (char ) malloc( (char_len + 1) * sizeof(char) );

	if (new_str == NULL) {
	printf("String alloc failed\n");
	return 1;
	}

	strncpy(new_str, str + char_offset, char_len);
	new_str[char_len] = '\0';

	array->array[array->current_len++] = new_str;

	return 0;
	}