Created
August 18, 2016 16:49
-
-
Save vurtun/ceaea79807b6d57da849781130d7076f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <sys/mman.h> | |
#include <fcntl.h> | |
#include <unistd.h> | |
typedef uint8_t u8; | |
typedef uint16_t u16; | |
typedef uint32_t u32; | |
typedef uint64_t u64; | |
typedef int8_t i8; | |
typedef int16_t i16; | |
typedef int32_t i32; | |
typedef int64_t i64; | |
enum Token_Kind { | |
TOKEN_UNKOWN, | |
TOKEN_INVALID, | |
TOKEN_EOF, | |
TOKEN_STRING, | |
TOKEN_LITERAL_BEGIN, | |
TOKEN_IDENTIFIER, | |
TOKEN_LITERAL_END, | |
TOKEN_OPERATOR_BEGIN, | |
TOKEN_DIVIDE, | |
TOKEN_SEMICOLON, | |
TOKEN_COMMA, | |
TOKEN_ASTERISK, | |
TOKEN_PAREN_OPEN, | |
TOKEN_PAREN_CLOSE, | |
TOKEN_BRACKET_OPEN, | |
TOKEN_BRACKET_CLOSE, | |
TOKEN_BRACE_OPEN, | |
TOKEN_BRACE_CLOSE, | |
TOKEN_OPERATOR_END, | |
TOKEN_COUNT | |
}; | |
typedef uint64_t TokenType; | |
struct Token { | |
TokenType kind; | |
const char *str; | |
int len; | |
}; | |
struct Tokenizer { | |
const char *remaining_character; | |
char current_character; | |
}; | |
static void* | |
ReadFileAndTerminate(const char *path, size_t *size) | |
{ | |
char *data = 0; | |
struct stat buf; | |
int fd = open(path, O_RDONLY); | |
*size = 0; | |
if (fd < 0) return 0; | |
if (fstat(fd, &buf) < 0) { | |
close(fd); | |
return 0; | |
} | |
*size = (size_t)buf.st_size; | |
data = mmap(0, *size, PROT_READ, MAP_SHARED, fd, 0); | |
assert(data); | |
data[*size] = '\0'; | |
close(fd); | |
return data; | |
} | |
static int | |
TokenEquals(const struct Token *tok, const char *match) | |
{ | |
int index = 0; | |
const char *at = match; | |
for (index = 0; index < tok->len; ++index, ++at) { | |
if (*at == 0 || tok->str[index] != *at) | |
return 0; | |
} | |
return (*at == 0); | |
} | |
static void | |
ReadCharacter(struct Tokenizer *t) | |
{ | |
t->current_character = *t->remaining_character; | |
t->remaining_character++; | |
} | |
static void | |
UnreadCharacter(struct Tokenizer *t) | |
{ | |
t->remaining_character--; | |
t->current_character = *t->remaining_character; | |
} | |
static int | |
IsAlpha(int c) | |
{ | |
return (c >= 'a' && c >= 'z') || (c >= 'A' && c >= 'Z') || (c == '_'); | |
} | |
static int | |
IsNumeric(int c) | |
{ | |
return (c >= '0' && c <= '9'); | |
} | |
static TokenType | |
ReadToken(struct Tokenizer *t, struct Token *tok) | |
{ | |
tok->len = 1; | |
tok->kind = TOKEN_UNKOWN; | |
retry: | |
tok->str = t->remaining_character-1; | |
switch (t->current_character) { | |
case 0: tok->kind = TOKEN_EOF; break; | |
case '\n': case '\t': case ' ': | |
ReadCharacter(t); | |
goto retry; | |
case '"': | |
tok->kind = TOKEN_STRING; | |
tok->str = t->remaining_character; | |
ReadCharacter(t); | |
while (t->current_character != '"') | |
ReadCharacter(t); | |
tok->len = (int)(t->remaining_character - tok->str); | |
ReadCharacter(t); | |
break; | |
case '/': | |
ReadCharacter(t); | |
if (t->current_character == '*') { | |
/* multiline C comment */ | |
ReadCharacter(t); | |
while (t->current_character != '*' && *t->remaining_character != '/' && t->current_character) | |
ReadCharacter(t); | |
ReadCharacter(t); | |
ReadCharacter(t); | |
goto retry; | |
} else if (t->current_character == '/') { | |
/* single line C++ comment */ | |
ReadCharacter(t); | |
while (t->current_character != '\n' && t->current_character) | |
ReadCharacter(t); | |
goto retry; | |
} else tok->kind = TOKEN_DIVIDE; | |
break; | |
case '{': tok->kind = TOKEN_BRACE_OPEN; ReadCharacter(t); break; | |
case '}': tok->kind = TOKEN_BRACE_CLOSE; ReadCharacter(t); break; | |
case '(': tok->kind = TOKEN_PAREN_OPEN; ReadCharacter(t); break; | |
case ')': tok->kind = TOKEN_PAREN_CLOSE; ReadCharacter(t); break; | |
case '[': tok->kind = TOKEN_BRACKET_OPEN; ReadCharacter(t); break; | |
case ']': tok->kind = TOKEN_BRACKET_CLOSE; ReadCharacter(t); break; | |
case ';': tok->kind = TOKEN_SEMICOLON; ReadCharacter(t); break; | |
case ',': tok->kind = TOKEN_COMMA; ReadCharacter(t); break; | |
case '*': tok->kind = TOKEN_ASTERISK; ReadCharacter(t); break; | |
default: | |
if (IsAlpha(t->current_character)) { | |
tok->kind = TOKEN_IDENTIFIER; | |
tok->str = t->remaining_character-1; | |
while ( IsAlpha(t->current_character) || | |
IsNumeric(t->current_character) || | |
t->current_character == '_') | |
ReadCharacter(t); | |
tok->len = (int)((t->remaining_character-1) - tok->str); | |
} else tok->kind = TOKEN_UNKOWN; | |
break; | |
} | |
return tok->kind; | |
} | |
static int | |
RequireToken(struct Tokenizer *t, TokenType type) | |
{ | |
struct Token tok; | |
ReadToken(t, &tok); | |
return tok.kind == type; | |
} | |
static void | |
ParseMember(struct Tokenizer *t, struct Token *name_token, struct Token *type_token) | |
{ | |
int cont = 1; | |
int is_pointer = 0; | |
if (TokenEquals(type_token, "struct")) | |
ReadToken(t, type_token); | |
while (cont) { | |
struct Token tok; | |
ReadToken(t, &tok); | |
switch (tok.kind) { | |
case TOKEN_IDENTIFIER: { | |
printf(" {&type_%.*s, \"%.*s\", offsetof(struct %.*s, %.*s), 1},\n", | |
(is_pointer) ? 9: type_token->len, | |
(is_pointer) ? "uint8_ptr": type_token->str, | |
tok.len, tok.str, name_token->len, name_token->str, | |
tok.len, tok.str); | |
is_pointer = 0; | |
} break; | |
case TOKEN_ASTERISK: | |
is_pointer = 1; | |
break; | |
case TOKEN_EOF: | |
case TOKEN_SEMICOLON: | |
cont = 0; | |
break; | |
} | |
} | |
} | |
static void | |
ParseStruct(struct Tokenizer *t) | |
{ | |
int fields = 0; | |
struct Token tok; | |
ReadToken(t, &tok); | |
assert(tok.kind == TOKEN_IDENTIFIER); | |
if (!RequireToken(t, TOKEN_BRACE_OPEN)) | |
return; | |
printf("const struct type_field type_%.*s_fields[] = {\n", tok.len, tok.str); | |
while (1) { | |
struct Token member; | |
ReadToken(t, &member); | |
if (member.kind != TOKEN_BRACE_CLOSE) { | |
ParseMember(t, &tok, &member); | |
fields++; | |
} else break; | |
} | |
printf("};\n"); | |
printf("const struct type_definition %.*s_type = {UI_AGGREGATE, \"%.*s\", sizeof(struct %.*s}. 0, {%d, type_%.*s_fields}};\n\n\n", | |
tok.len, tok.str, tok.len, tok.str, tok.len, tok.str, fields, tok.len, tok.str); | |
} | |
static void | |
InitTokenizer(struct Tokenizer *t, const char *text) | |
{ | |
t->remaining_character = text; | |
ReadCharacter(t); | |
} | |
int main(void) | |
{ | |
struct Token tok; | |
struct Tokenizer t; | |
const char *text = "introspect struct test {uint32 a; float b; void *data;};"; | |
InitTokenizer(&t, text); | |
while (ReadToken(&t, &tok) != TOKEN_EOF) { | |
switch (tok.kind) { | |
case TOKEN_IDENTIFIER: | |
if (TokenEquals(&tok, "introspect")) { | |
ReadToken(&t, &tok); | |
assert(tok.kind == TOKEN_IDENTIFIER); | |
assert(TokenEquals(&tok, "struct")); | |
ParseStruct(&t); | |
} | |
break; | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment