Skip to content

Instantly share code, notes, and snippets.

@yorickpeterse
Created January 7, 2015 19:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yorickpeterse/8c5343dd3c2674b63b13 to your computer and use it in GitHub Desktop.
Save yorickpeterse/8c5343dd3c2674b63b13 to your computer and use it in GitHub Desktop.
#include <juno/lexer.hpp>
using namespace Juno::Lexing;
#define ADVANCE_COLUMN column = column + (te - ts)
#define ADVANCE_LINE line++; column = 0;
#define ADD_TOKEN(TYPE) \
tokens->push(this->create_token(TYPE, ts, te, line, column)); \
ADVANCE_COLUMN
%%{
machine juno_lexer;
keyword = 'class' | 'def' | 'mixin' | 'use' | 'end';
semicolon = ';';
# Only Unix style linebreaks are allowed.
newline = '\n';
# The built-in `space` machine doesn't play nice with incrementing line
# numbers.
whitespace = [ \t];
# Although treated as method calls these methods are considered syntax
# sugar so that they can be used without parenthesis. This allows `10 + 10`
# instead of `10.+(10)`.
operator = [+-/*%|&];
# Floats come in the format of, well, the usual floats (e.g. `10.5`).
# They can be prefixed with a + or - to indicate a positive or negative
# float.
float = ('+'|'-')*[0-9\.]+;
# Integers are basically the same as floats except they can not include a
# dot (since then it's a float, shocking!).
integer = ('+'|'-')*[0-9]+;
# Comments are written in the form `# ...` where `...` is anything but a
# newline.
comment = '#' [^\n]* newline;
# Constants come in the format of `FooBar`. They *must* start with a
# capital and can then include any alpha numerical character.
constant = upper+alnum+;
main := |*
operator => { ADD_TOKEN(Token::OPERATOR); };
integer => { ADD_TOKEN(Token::INTEGER); };
float => { ADD_TOKEN(Token::FLOAT); };
keyword => { ADD_TOKEN(Token::KEYWORD); };
constant => { ADD_TOKEN(Token::CONSTANT); };
comment => { ADD_TOKEN(Token::COMMENT); ADVANCE_LINE; };
semicolon => { ADD_TOKEN(Token::SEMICOLON); };
whitespace => { ADVANCE_COLUMN; };
newline => { ADVANCE_LINE; };
*|;
}%%
%% write data;
TokenList *Juno::Lexer::lex(const char *p)
{
auto *tokens = new ::TokenList();
const char *ts, *te;
char *eof = 0;
size_t line = 1;
size_t column = 1;
int act = 0;
int cs = 0;
%% write init;
%% write exec noend;
return tokens;
}
Token *Juno::Lexer::create_token(
Token::TOKEN_TYPE type,
const char *start,
const char *stop,
size_t line,
size_t column
)
{
size_t length = stop - start;
std::string value = std::string(start).substr(0, length);
return new Token(type, value, line, column);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment