Skip to content

Instantly share code, notes, and snippets.

@sternenseemann
Last active November 24, 2016 23:16
Show Gist options
  • Save sternenseemann/63dde02d7b103c9490c17e0bff8221e9 to your computer and use it in GitHub Desktop.
Save sternenseemann/63dde02d7b103c9490c17e0bff8221e9 to your computer and use it in GitHub Desktop.
Appearantly a lisp tokenizer
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <ctype.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#define TOKENS_CHUNK 50
/* Tokenizer state */
enum tokenizer_state {
NOT_IN_TOKEN,
IN_TOKEN
};
/* Tokens */
enum token_kind {
OPENING_PAREN,
CLOSING_PAREN,
ATOM
};
typedef struct token {
enum token_kind type;
size_t offset;
} token;
/* Tokenization result */
enum tokenize_status {
ALLOC_ERROR,
PARSE_ERROR,
SUCCESS
};
typedef struct tokenize_result {
enum tokenize_status status;
token *tokens;
size_t length;
} tokenize_result;
void append_token(tokenize_result *res, enum token_kind type, size_t offset) {
res->tokens[res->length].type = type;
res->tokens[res->length].offset = offset;
res->length += 1;
}
tokenize_result tokenize(char *input) {
/* current char index */
size_t i = 0;
enum tokenizer_state state = NOT_IN_TOKEN;
tokenize_result result = {0};
/* allocated size of the array */
size_t length = TOKENS_CHUNK;
/* length of the used part */
result.length = 0;
result.status = SUCCESS;
result.tokens = malloc(sizeof (token) * length);
if(result.tokens == NULL) {
result.status = ALLOC_ERROR;
return result;
}
while (input[i] != '\0') {
if(result.length >= length) {
length = length + TOKENS_CHUNK;
result.tokens = realloc(result.tokens, sizeof (token) * length);
if(result.tokens == NULL) {
result.status = ALLOC_ERROR;
return result;
}
}
switch(input[i]) {
case '(':
append_token(&result, OPENING_PAREN, i);
state = NOT_IN_TOKEN;
break;
case ')':
append_token(&result, CLOSING_PAREN, i);
state = NOT_IN_TOKEN;
break;
case ';':
while(input[i] != '\0' && input[i] != '\n') {
i++;
}
state = NOT_IN_TOKEN;
break;
case '\t':
case '\r':
case '\n':
case ' ':
state = NOT_IN_TOKEN;
break;
default:
if(state == NOT_IN_TOKEN) {
append_token(&result, ATOM, i);
}
state = IN_TOKEN;
break;
}
i++;
}
return result;
}
int main(int argc, char *argv[]) {
if(argc != 2) {
fprintf(stderr, "Usage: %s <file>\n", argv[0]);
return 1;
}
int fd;
char *input;
struct stat stat;
fd = open(argv[1], O_RDONLY);
if(fd == -1) {
fprintf(stderr, "Could not open file \"%s\"", argv[1]);
return 1;
}
if (fstat(fd, &stat) == -1) {
fprintf(stderr, "Could not stat file \"%s\"", argv[1]);
return 1;
}
input = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (input == MAP_FAILED) {
printf("Could not read file \"%s\"\n", argv[1]);
} else {
printf("-- input --\n %s\n-- input --\n\n", input);
tokenize_result res = tokenize(input);
if(res.status == SUCCESS) {
for(size_t i = 0; i < res.length; i++) {
switch(res.tokens[i].type) {
case ATOM:
for(size_t j = res.tokens[i].offset;
input[j] != '\0' && input[j] != '(' && input[j] != ')' && !isspace(input[j]);
j++) {
printf("%c", input[j]);
}
printf("\n");
break;
default:
printf("%c\n", input[res.tokens[i].offset]);
}
}
free(res.tokens);
} else {
fprintf(stderr, "An error occurend: %u\n", res.status);
if(res.tokens != NULL) {
free(res.tokens);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment