Created
January 7, 2015 19:05
-
-
Save yorickpeterse/8c5343dd3c2674b63b13 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <juno/lexer.hpp> | |
using namespace Juno::Lexing; | |
#define ADVANCE_COLUMN column = column + (te - ts) | |
#define ADVANCE_LINE line++; column = 0; | |
#define ADD_TOKEN(TYPE) \ | |
tokens->push(this->create_token(TYPE, ts, te, line, column)); \ | |
ADVANCE_COLUMN | |
%%{ | |
machine juno_lexer; | |
keyword = 'class' | 'def' | 'mixin' | 'use' | 'end'; | |
semicolon = ';'; | |
# Only Unix style linebreaks are allowed. | |
newline = '\n'; | |
# The built-in `space` machine doesn't play nice with incrementing line | |
# numbers. | |
whitespace = [ \t]; | |
# Although treated as method calls these methods are considered syntax | |
# sugar so that they can be used without parenthesis. This allows `10 + 10` | |
# instead of `10.+(10)`. | |
operator = [+-/*%|&]; | |
# Floats come in the format of, well, the usual floats (e.g. `10.5`). | |
# They can be prefixed with a + or - to indicate a positive or negative | |
# float. | |
float = ('+'|'-')*[0-9\.]+; | |
# Integers are basically the same as floats except they can not include a | |
# dot (since then it's a float, shocking!). | |
integer = ('+'|'-')*[0-9]+; | |
# Comments are written in the form `# ...` where `...` is anything but a | |
# newline. | |
comment = '#' [^\n]* newline; | |
# Constants come in the format of `FooBar`. They *must* start with a | |
# capital and can then include any alpha numerical character. | |
constant = upper+alnum+; | |
main := |* | |
operator => { ADD_TOKEN(Token::OPERATOR); }; | |
integer => { ADD_TOKEN(Token::INTEGER); }; | |
float => { ADD_TOKEN(Token::FLOAT); }; | |
keyword => { ADD_TOKEN(Token::KEYWORD); }; | |
constant => { ADD_TOKEN(Token::CONSTANT); }; | |
comment => { ADD_TOKEN(Token::COMMENT); ADVANCE_LINE; }; | |
semicolon => { ADD_TOKEN(Token::SEMICOLON); }; | |
whitespace => { ADVANCE_COLUMN; }; | |
newline => { ADVANCE_LINE; }; | |
*|; | |
}%% | |
%% write data; | |
TokenList *Juno::Lexer::lex(const char *p) | |
{ | |
auto *tokens = new ::TokenList(); | |
const char *ts, *te; | |
char *eof = 0; | |
size_t line = 1; | |
size_t column = 1; | |
int act = 0; | |
int cs = 0; | |
%% write init; | |
%% write exec noend; | |
return tokens; | |
} | |
Token *Juno::Lexer::create_token( | |
Token::TOKEN_TYPE type, | |
const char *start, | |
const char *stop, | |
size_t line, | |
size_t column | |
) | |
{ | |
size_t length = stop - start; | |
std::string value = std::string(start).substr(0, length); | |
return new Token(type, value, line, column); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment