public
Last active

Simple lexical analyzer for Rust

  • Download Gist
lex.rs
Rust
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
/// Returns a length of the longest prefix of given string, which
/// `uint::from_str` accepts without a failure, if any.
pub pure fn scan_uint(s: &str) -> Option<uint> {
match str::find(s, |c| !('0' <= c && c <= '9')) {
Some(first) if first > 0u => Some(first),
None if s.len() > 0u => Some(s.len()),
_ => None
}
}
 
/// Returns a length of the longest prefix of given string, which
/// `int::from_str` accepts without a failure, if any.
pub pure fn scan_int(s: &str) -> Option<uint> {
if s.starts_with(~"-") || s.starts_with(~"+") {
scan_uint(s.slice_to_end(1u)).map(|pos| pos + 1u)
} else {
scan_uint(s)
}
}
 
/// Returns a length of the longest prefix of given string, which
/// `float::from_str` accepts without a failure, if any.
pub pure fn scan_float(s: &str) -> Option<uint> {
do scan_int(s).chain_ref |&pos| {
if s.len() > pos && s.char_at(pos) == '.' {
let pos2 = scan_uint(s.slice_to_end(pos + 1u));
pos2.map(|&pos2| pos + pos2 + 1u)
} else {
Some(pos)
}
}
}
 
/// Extensions to `str`.
pub trait StrUtil {
/// Returns a slice of the given string starting from `begin`.
///
/// # Failure
///
/// If `begin` does not point to valid characters or beyond
/// the last character of the string
pure fn slice_to_end(self, begin: uint) -> ~str;
 
/// Returns a length of the longest prefix of given string, which
/// `uint::from_str` accepts without a failure, if any.
pure fn scan_uint(self) -> Option<uint>;
 
/// Returns a length of the longest prefix of given string, which
/// `int::from_str` accepts without a failure, if any.
pure fn scan_int(self) -> Option<uint>;
 
/// Returns a length of the longest prefix of given string, which
/// `float::from_str` accepts without a failure, if any.
pure fn scan_float(self) -> Option<uint>;
}
 
pub impl StrUtil for &str {
pure fn slice_to_end(self, begin: uint) -> ~str {
self.slice(begin, self.len())
}
pure fn scan_uint(self) -> Option<uint> { scan_uint(self) }
pure fn scan_int(self) -> Option<uint> { scan_int(self) }
pure fn scan_float(self) -> Option<uint> { scan_float(self) }
}
 
/// A trait which provides `prefix_shifted` method. Similar to
/// `str::starts_with`, but with swapped `self` and argument.
pub trait ShiftablePrefix {
/// Returns a slice of given string with `self` at the start of
/// the string stripped only once, if any.
pure fn prefix_shifted(&self, s: &str) -> Option<~str>;
}
 
pub impl ShiftablePrefix for char {
pure fn prefix_shifted(&self, s: &str) -> Option<~str> {
if !s.is_empty() {
let str::CharRange {ch, next} = str::char_range_at(s, 0u);
if ch == *self { Some(s.slice_to_end(next)) } else { None }
} else {
None
}
}
}
 
pub impl ShiftablePrefix for &str {
pure fn prefix_shifted(&self, s: &str) -> Option<~str> {
if s.starts_with(*self) {
Some(s.slice_to_end(self.len()))
} else {
None
}
}
}
 
// A lexer barely powerful enough to parse BMS format. Comparable to
// C's `sscanf`.
//
// `lex!(e; fmt1, fmt2, ..., fmtN)` returns an expression that evaluates
// to true if and only if all format specification is consumed. The format
// specification (analogous to `sscanf`'s `%`-string) is as follows:
//
// - `ws`: Consumes one or more whitespace. (C: `%*[ \t\r\n]` or similar)
// - `ws*`: Consumes zero or more whitespace. (C: ` `)
// - `int [-> e2]`: Consumes an integer and optionally saves it to `e2`.
// (C: `%d` and `%*d`, but does not consume preceding whitespace)
// The integer syntax is slightly limited compared to `sscanf`.
// - `float [-> e2]`: Consumes a real number and optionally saves it to
// `e2`. (C: `%f` etc.) Again, the real number syntax is slightly
// limited; especially an exponent support is missing.
// - `str [-> e2]`: Consumes a remaining input as a string and optionally
// saves it to `e2`. (C: `%s` etc.) Implies `!`.
// - `!`: Ensures that the entire string has been consumed. Should be the
// last format specification.
// - `"foo"` etc.: An ordinary expression is treated as a literal string
// or literal character.
//
// Rust: - there is no `libc::sscanf` due to the varargs. maybe regex
// support will make this useless in the future, but not now.
// - multiple statements do not expand correctly. (#4375)
// - it is desirable to have a matcher only accepts an integer
// literal or string literal, not a generic expression.
// - no hygienic macro yet. possibly observable names from `$e`
// should be escaped for now.
// - it would be more useful to generate bindings for parsed result.
// this is related to many issues in general.
macro_rules! lex(
($e:expr; ) => (true);
($e:expr; !) => ($e.is_empty());
 
($e:expr; int -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: num::from_str_bytes_common does not recognize a number
// followed by garbage, so we need to parse it ourselves.
do _line.scan_int().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do int::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; uint -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: ditto.
do _line.scan_uint().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do uint::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; float -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: ditto.
do _line.scan_float().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do float::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; str -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
$dst = _line.to_owned();
lex!(""; $($tail)*) // optimization!
});
 
($e:expr; ws, $($tail:tt)*) => ({
let _line: &str = $e;
if !_line.is_empty() && char::is_whitespace(_line.char_at(0)) {
lex!(str::trim_left(_line); $($tail)*)
} else {
false
}
});
($e:expr; ws*, $($tail:tt)*) => ({
let _line: &str = $e;
lex!(str::trim_left(_line); $($tail)*)
});
($e:expr; int, $($tail:tt)*) => ({
let mut _dummy: int = 0;
lex!($e; int -> _dummy, $($tail)*)
});
($e:expr; uint, $($tail:tt)*) => ({
let mut _dummy: uint = 0;
lex!($e; uint -> _dummy, $($tail)*)
});
($e:expr; float, $($tail:tt)*) => ({
let mut _dummy: float = 0.0;
lex!($e; float -> _dummy, $($tail)*)
});
($e:expr; str, $($tail:tt)*) => ({
lex!(""; $($tail)*) // optimization!
});
($e:expr; $lit:expr, $($tail:tt)*) => ({
do $lit.prefix_shifted($e).map_default(false) |&_line| {
lex!(_line; $($tail)*)
}
});
 
($e:expr; int -> $dst:expr) => (lex!($e; int -> $dst, ));
($e:expr; uint -> $dst:expr) => (lex!($e; uint -> $dst, ));
($e:expr; float -> $dst:expr) => (lex!($e; float -> $dst, ));
($e:expr; str -> $dst:expr) => (lex!($e; str -> $dst, ));
($e:expr; ws) => (lex!($e; ws, ));
($e:expr; ws*) => (lex!($e; ws*, ));
($e:expr; int) => (lex!($e; int, ));
($e:expr; uint) => (lex!($e; uint, ));
($e:expr; float) => (lex!($e; float, ));
($e:expr; str) => (lex!($e; str, ));
($e:expr; $lit:expr) => (lex!($e; $lit, ))
)
 
fn main() {
let mut x = 0, y = 0.0, s = ~"";
if lex!(~" +42 0 hello -5.4 @ Hello, world!";
ws*, int -> x, ws*, uint, ws*, "hello ", float -> y, ws, '@', str -> s, !) {
io::println(fmt!("[%?][%?][%?]", x, y, s));
} else {
io::println(~"fail");
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.