yorickpeterse/juno_lexer.rl

## juno_lexer.rl
#include <juno/lexer.hpp>

using namespace Juno::Lexing;

#define ADVANCE_COLUMN column = column + (te - ts)
#define ADVANCE_LINE line++; column = 0;

#define ADD_TOKEN(TYPE) \
    tokens->push(this->create_token(TYPE, ts, te, line, column)); \
    ADVANCE_COLUMN

%%{
    machine juno_lexer;

    keyword   = 'class' | 'def' | 'mixin' | 'use' | 'end';
    semicolon = ';';

    # Only Unix style linebreaks are allowed.
    newline = '\n';

    # The built-in `space` machine doesn't play nice with incrementing line
    # numbers.
    whitespace = [ \t];

    # Although treated as method calls these methods are considered syntax
    # sugar so that they can be used without parenthesis. This allows `10 + 10`
    # instead of `10.+(10)`.
    operator = [+-/*%|&];

    # Floats come in the format of, well, the usual floats (e.g. `10.5`).
    # They can be prefixed with a + or - to indicate a positive or negative
    # float.
    float = ('+'|'-')*[0-9\.]+;

    # Integers are basically the same as floats except they can not include a
    # dot (since then it's a float, shocking!).
    integer = ('+'|'-')*[0-9]+;

    # Comments are written in the form `# ...` where `...` is anything but a
    # newline.
    comment = '#' [^\n]* newline;

    # Constants come in the format of `FooBar`. They *must* start with a
    # capital and can then include any alpha numerical character.
    constant = upper+alnum+;

    main := |*
        operator => { ADD_TOKEN(Token::OPERATOR); };
        integer  => { ADD_TOKEN(Token::INTEGER); };
        float    => { ADD_TOKEN(Token::FLOAT); };
        keyword  => { ADD_TOKEN(Token::KEYWORD); };
        constant => { ADD_TOKEN(Token::CONSTANT); };
        comment  => { ADD_TOKEN(Token::COMMENT); ADVANCE_LINE; };

        semicolon  => { ADD_TOKEN(Token::SEMICOLON); };
        whitespace => { ADVANCE_COLUMN; };
        newline    => { ADVANCE_LINE; };
    *|;
}%%

%% write data;

TokenList *Juno::Lexer::lex(const char *p)
{
    auto *tokens = new ::TokenList();

    const char *ts, *te;

    char *eof = 0;

    size_t line   = 1;
    size_t column = 1;
    int act       = 0;
    int cs        = 0;

    %% write init;
    %% write exec noend;

    return tokens;
}

Token *Juno::Lexer::create_token(
    Token::TOKEN_TYPE type,
    const char *start,
    const char *stop,
    size_t line,
    size_t column
)
{
    size_t length     = stop - start;
    std::string value = std::string(start).substr(0, length);

    return new Token(type, value, line, column);
}
	#include <juno/lexer.hpp>

	using namespace Juno::Lexing;

	#define ADVANCE_COLUMN column = column + (te - ts)
	#define ADVANCE_LINE line++; column = 0;

	#define ADD_TOKEN(TYPE) \
	tokens->push(this->create_token(TYPE, ts, te, line, column)); \
	ADVANCE_COLUMN

	%%{
	machine juno_lexer;

	keyword = 'class' \| 'def' \| 'mixin' \| 'use' \| 'end';
	semicolon = ';';

	# Only Unix style linebreaks are allowed.
	newline = '\n';

	# The built-in `space` machine doesn't play nice with incrementing line
	# numbers.
	whitespace = [ \t];

	# Although treated as method calls these methods are considered syntax
	# sugar so that they can be used without parenthesis. This allows `10 + 10`
	# instead of `10.+(10)`.
	operator = [+-/*%\|&];

	# Floats come in the format of, well, the usual floats (e.g. `10.5`).
	# They can be prefixed with a + or - to indicate a positive or negative
	# float.
	float = ('+'\|'-')*[0-9\.]+;

	# Integers are basically the same as floats except they can not include a
	# dot (since then it's a float, shocking!).
	integer = ('+'\|'-')*[0-9]+;

	# Comments are written in the form `# ...` where `...` is anything but a
	# newline.
	comment = '#' [^\n]* newline;

	# Constants come in the format of `FooBar`. They must start with a
	# capital and can then include any alpha numerical character.
	constant = upper+alnum+;

	main := \|*
	operator => { ADD_TOKEN(Token::OPERATOR); };
	integer => { ADD_TOKEN(Token::INTEGER); };
	float => { ADD_TOKEN(Token::FLOAT); };
	keyword => { ADD_TOKEN(Token::KEYWORD); };
	constant => { ADD_TOKEN(Token::CONSTANT); };
	comment => { ADD_TOKEN(Token::COMMENT); ADVANCE_LINE; };

	semicolon => { ADD_TOKEN(Token::SEMICOLON); };
	whitespace => { ADVANCE_COLUMN; };
	newline => { ADVANCE_LINE; };
	*\|;
	}%%

	%% write data;

	TokenList Juno::Lexer::lex(const char p)
	{
	auto *tokens = new ::TokenList();

	const char ts, te;

	char *eof = 0;

	size_t line = 1;
	size_t column = 1;
	int act = 0;
	int cs = 0;

	%% write init;
	%% write exec noend;

	return tokens;
	}

	Token *Juno::Lexer::create_token(
	Token::TOKEN_TYPE type,
	const char *start,
	const char *stop,
	size_t line,
	size_t column
	)
	{
	size_t length = stop - start;
	std::string value = std::string(start).substr(0, length);

	return new Token(type, value, line, column);
	}