Skip to content

Instantly share code, notes, and snippets.

@khayrov
Forked from lucassmagal/Lexer.java
Created August 28, 2012 16:25
Show Gist options
  • Save khayrov/3499946 to your computer and use it in GitHub Desktop.
Save khayrov/3499946 to your computer and use it in GitHub Desktop.
Ragel with Java
// line 1 "Lexer.rl"
public class Lexer {
// line 19 "Lexer.rl"
// line 10 "Lexer.java"
private static byte[] init__simple_lexer_actions_0()
{
return new byte [] {
0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1,
5, 1, 6, 1, 7, 1, 8
};
}
private static final byte _simple_lexer_actions[] = init__simple_lexer_actions_0();
private static byte[] init__simple_lexer_key_offsets_0()
{
return new byte [] {
0, 0, 2, 4, 16, 19, 21
};
}
private static final byte _simple_lexer_key_offsets[] = init__simple_lexer_key_offsets_0();
private static char[] init__simple_lexer_trans_keys_0()
{
return new char [] {
48, 57, 48, 57, 32, 43, 45, 61, 9, 13, 48, 57,
65, 90, 97, 122, 46, 48, 57, 48, 57, 95, 65, 90,
97, 122, 0
};
}
private static final char _simple_lexer_trans_keys[] = init__simple_lexer_trans_keys_0();
private static byte[] init__simple_lexer_single_lengths_0()
{
return new byte [] {
0, 0, 0, 4, 1, 0, 1
};
}
private static final byte _simple_lexer_single_lengths[] = init__simple_lexer_single_lengths_0();
private static byte[] init__simple_lexer_range_lengths_0()
{
return new byte [] {
0, 1, 1, 4, 1, 1, 2
};
}
private static final byte _simple_lexer_range_lengths[] = init__simple_lexer_range_lengths_0();
private static byte[] init__simple_lexer_index_offsets_0()
{
return new byte [] {
0, 0, 2, 4, 13, 16, 18
};
}
private static final byte _simple_lexer_index_offsets[] = init__simple_lexer_index_offsets_0();
private static byte[] init__simple_lexer_trans_targs_0()
{
return new byte [] {
4, 0, 5, 3, 3, 1, 1, 3, 3, 4, 6, 6,
0, 2, 4, 3, 5, 3, 6, 6, 6, 3, 3, 3,
3, 3, 0
};
}
private static final byte _simple_lexer_trans_targs[] = init__simple_lexer_trans_targs_0();
private static byte[] init__simple_lexer_trans_actions_0()
{
return new byte [] {
5, 0, 0, 17, 9, 0, 0, 7, 9, 5, 0, 0,
0, 0, 5, 11, 0, 13, 0, 0, 0, 15, 17, 11,
13, 15, 0
};
}
private static final byte _simple_lexer_trans_actions[] = init__simple_lexer_trans_actions_0();
private static byte[] init__simple_lexer_to_state_actions_0()
{
return new byte [] {
0, 0, 0, 1, 0, 0, 0
};
}
private static final byte _simple_lexer_to_state_actions[] = init__simple_lexer_to_state_actions_0();
private static byte[] init__simple_lexer_from_state_actions_0()
{
return new byte [] {
0, 0, 0, 3, 0, 0, 0
};
}
private static final byte _simple_lexer_from_state_actions[] = init__simple_lexer_from_state_actions_0();
private static byte[] init__simple_lexer_eof_trans_0()
{
return new byte [] {
0, 0, 23, 0, 24, 25, 26
};
}
private static final byte _simple_lexer_eof_trans[] = init__simple_lexer_eof_trans_0();
static final int simple_lexer_start = 3;
static final int simple_lexer_first_final = 3;
static final int simple_lexer_error = 0;
static final int simple_lexer_en_main = 3;
// line 22 "Lexer.rl"
public static void emit(String token) {
System.out.println(token);
}
public static void main(String[] args) {
int cs; /* state number */
char[] data = "x = 22 yz = 11.46".toCharArray(); /* input */
int p = 0, /* start of input */
pe = data.length, /* end of input */
eof = pe,
ts, /* token start */
te, /* token end */
act /* used for scanner backtracking */;
// line 152 "Lexer.java"
{
cs = simple_lexer_start;
ts = -1;
te = -1;
act = 0;
}
// line 38 "Lexer.rl"
// line 163 "Lexer.java"
{
int _klen;
int _trans = 0;
int _acts;
int _nacts;
int _keys;
int _goto_targ = 0;
_goto: while (true) {
switch ( _goto_targ ) {
case 0:
if ( p == pe ) {
_goto_targ = 4;
continue _goto;
}
if ( cs == 0 ) {
_goto_targ = 5;
continue _goto;
}
case 1:
_acts = _simple_lexer_from_state_actions[cs];
_nacts = (int) _simple_lexer_actions[_acts++];
while ( _nacts-- > 0 ) {
switch ( _simple_lexer_actions[_acts++] ) {
case 1:
// line 1 "NONE"
{ts = p;}
break;
// line 192 "Lexer.java"
}
}
_match: do {
_keys = _simple_lexer_key_offsets[cs];
_trans = _simple_lexer_index_offsets[cs];
_klen = _simple_lexer_single_lengths[cs];
if ( _klen > 0 ) {
int _lower = _keys;
int _mid;
int _upper = _keys + _klen - 1;
while (true) {
if ( _upper < _lower )
break;
_mid = _lower + ((_upper-_lower) >> 1);
if ( data[p] < _simple_lexer_trans_keys[_mid] )
_upper = _mid - 1;
else if ( data[p] > _simple_lexer_trans_keys[_mid] )
_lower = _mid + 1;
else {
_trans += (_mid - _keys);
break _match;
}
}
_keys += _klen;
_trans += _klen;
}
_klen = _simple_lexer_range_lengths[cs];
if ( _klen > 0 ) {
int _lower = _keys;
int _mid;
int _upper = _keys + (_klen<<1) - 2;
while (true) {
if ( _upper < _lower )
break;
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( data[p] < _simple_lexer_trans_keys[_mid] )
_upper = _mid - 2;
else if ( data[p] > _simple_lexer_trans_keys[_mid+1] )
_lower = _mid + 2;
else {
_trans += ((_mid - _keys)>>1);
break _match;
}
}
_trans += _klen;
}
} while (false);
case 3:
cs = _simple_lexer_trans_targs[_trans];
if ( _simple_lexer_trans_actions[_trans] != 0 ) {
_acts = _simple_lexer_trans_actions[_trans];
_nacts = (int) _simple_lexer_actions[_acts++];
while ( _nacts-- > 0 )
{
switch ( _simple_lexer_actions[_acts++] )
{
case 2:
// line 1 "NONE"
{te = p+1;}
break;
case 3:
// line 14 "Lexer.rl"
{te = p+1;{ emit("assignment"); }}
break;
case 4:
// line 16 "Lexer.rl"
{te = p+1;{ emit("space"); }}
break;
case 5:
// line 12 "Lexer.rl"
{te = p;p--;{ emit("integer"); }}
break;
case 6:
// line 13 "Lexer.rl"
{te = p;p--;{ emit("float"); }}
break;
case 7:
// line 15 "Lexer.rl"
{te = p;p--;{ emit("identifier"); }}
break;
case 8:
// line 12 "Lexer.rl"
{{p = ((te))-1;}{ emit("integer"); }}
break;
// line 283 "Lexer.java"
}
}
}
case 2:
_acts = _simple_lexer_to_state_actions[cs];
_nacts = (int) _simple_lexer_actions[_acts++];
while ( _nacts-- > 0 ) {
switch ( _simple_lexer_actions[_acts++] ) {
case 0:
// line 1 "NONE"
{ts = -1;}
break;
// line 297 "Lexer.java"
}
}
if ( cs == 0 ) {
_goto_targ = 5;
continue _goto;
}
if ( ++p != pe ) {
_goto_targ = 1;
continue _goto;
}
case 4:
if ( p == eof )
{
if ( _simple_lexer_eof_trans[cs] > 0 ) {
_trans = _simple_lexer_eof_trans[cs] - 1;
_goto_targ = 3;
continue _goto;
}
}
case 5:
}
break; }
}
// line 40 "Lexer.rl"
}
}
public class Lexer {
%%{
machine simple_lexer;
integer = ('+'|'-')?[0-9]+;
float = ('+'|'-')?[0-9]+'.'[0-9]+;
assignment = '=';
identifier = [a-zA-Z][a-zA-Z_]*;
main := |*
integer => { emit("integer"); };
float => { emit("float"); };
assignment => { emit("assignment"); };
identifier => { emit("identifier"); };
space => { emit("space"); };
*|;
}%%
%% write data;
public static void emit(String token) {
System.out.println(token);
}
public static void main(String[] args) {
int cs; /* state number */
char[] data = "x = 22 yz = 11.46".toCharArray(); /* input */
int p = 0, /* start of input */
pe = data.length, /* end of input */
eof = pe,
ts, /* token start */
te, /* token end */
act /* used for scanner backtracking */;
%% write init;
%% write exec;
}
}
$ ragel -J Lexer.rl -o Lexer.java && javac Lexer.java && java Lexer
identifier
space
assignment
space
integer
space
identifier
space
assignment
space
float
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment