Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Last active December 14, 2015 09:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lifthrasiir/5063680 to your computer and use it in GitHub Desktop.
Save lifthrasiir/5063680 to your computer and use it in GitHub Desktop.
Simple lexical analyzer for Rust
/// Returns a length of the longest prefix of given string, which
/// `uint::from_str` accepts without a failure, if any.
pub pure fn scan_uint(s: &str) -> Option<uint> {
match str::find(s, |c| !('0' <= c && c <= '9')) {
Some(first) if first > 0u => Some(first),
None if s.len() > 0u => Some(s.len()),
_ => None
}
}
/// Returns a length of the longest prefix of given string, which
/// `int::from_str` accepts without a failure, if any.
pub pure fn scan_int(s: &str) -> Option<uint> {
if s.starts_with(~"-") || s.starts_with(~"+") {
scan_uint(s.slice_to_end(1u)).map(|pos| pos + 1u)
} else {
scan_uint(s)
}
}
/// Returns a length of the longest prefix of given string, which
/// `float::from_str` accepts without a failure, if any.
pub pure fn scan_float(s: &str) -> Option<uint> {
do scan_int(s).chain_ref |&pos| {
if s.len() > pos && s.char_at(pos) == '.' {
let pos2 = scan_uint(s.slice_to_end(pos + 1u));
pos2.map(|&pos2| pos + pos2 + 1u)
} else {
Some(pos)
}
}
}
/// Extensions to `str`.
pub trait StrUtil {
/// Returns a slice of the given string starting from `begin`.
///
/// # Failure
///
/// If `begin` does not point to valid characters or beyond
/// the last character of the string
pure fn slice_to_end(self, begin: uint) -> ~str;
/// Returns a length of the longest prefix of given string, which
/// `uint::from_str` accepts without a failure, if any.
pure fn scan_uint(self) -> Option<uint>;
/// Returns a length of the longest prefix of given string, which
/// `int::from_str` accepts without a failure, if any.
pure fn scan_int(self) -> Option<uint>;
/// Returns a length of the longest prefix of given string, which
/// `float::from_str` accepts without a failure, if any.
pure fn scan_float(self) -> Option<uint>;
}
pub impl StrUtil for &str {
pure fn slice_to_end(self, begin: uint) -> ~str {
self.slice(begin, self.len())
}
pure fn scan_uint(self) -> Option<uint> { scan_uint(self) }
pure fn scan_int(self) -> Option<uint> { scan_int(self) }
pure fn scan_float(self) -> Option<uint> { scan_float(self) }
}
/// A trait which provides `prefix_shifted` method. Similar to
/// `str::starts_with`, but with swapped `self` and argument.
pub trait ShiftablePrefix {
/// Returns a slice of given string with `self` at the start of
/// the string stripped only once, if any.
pure fn prefix_shifted(&self, s: &str) -> Option<~str>;
}
pub impl ShiftablePrefix for char {
pure fn prefix_shifted(&self, s: &str) -> Option<~str> {
if !s.is_empty() {
let str::CharRange {ch, next} = str::char_range_at(s, 0u);
if ch == *self { Some(s.slice_to_end(next)) } else { None }
} else {
None
}
}
}
pub impl ShiftablePrefix for &str {
pure fn prefix_shifted(&self, s: &str) -> Option<~str> {
if s.starts_with(*self) {
Some(s.slice_to_end(self.len()))
} else {
None
}
}
}
// A lexer barely powerful enough to parse BMS format. Comparable to
// C's `sscanf`.
//
// `lex!(e; fmt1, fmt2, ..., fmtN)` returns an expression that evaluates
// to true if and only if all format specification is consumed. The format
// specification (analogous to `sscanf`'s `%`-string) is as follows:
//
// - `ws`: Consumes one or more whitespace. (C: `%*[ \t\r\n]` or similar)
// - `ws*`: Consumes zero or more whitespace. (C: ` `)
// - `int [-> e2]`: Consumes an integer and optionally saves it to `e2`.
// (C: `%d` and `%*d`, but does not consume preceding whitespace)
// The integer syntax is slightly limited compared to `sscanf`.
// - `float [-> e2]`: Consumes a real number and optionally saves it to
// `e2`. (C: `%f` etc.) Again, the real number syntax is slightly
// limited; especially an exponent support is missing.
// - `str [-> e2]`: Consumes a remaining input as a string and optionally
// saves it to `e2`. (C: `%s` etc.) Implies `!`.
// - `!`: Ensures that the entire string has been consumed. Should be the
// last format specification.
// - `"foo"` etc.: An ordinary expression is treated as a literal string
// or literal character.
//
// Rust: - there is no `libc::sscanf` due to the varargs. maybe regex
// support will make this useless in the future, but not now.
// - multiple statements do not expand correctly. (#4375)
// - it is desirable to have a matcher only accepts an integer
// literal or string literal, not a generic expression.
// - no hygienic macro yet. possibly observable names from `$e`
// should be escaped for now.
// - it would be more useful to generate bindings for parsed result.
// this is related to many issues in general.
macro_rules! lex(
($e:expr; ) => (true);
($e:expr; !) => ($e.is_empty());
($e:expr; int -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: num::from_str_bytes_common does not recognize a number
// followed by garbage, so we need to parse it ourselves.
do _line.scan_int().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do int::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; uint -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: ditto.
do _line.scan_uint().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do uint::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; float -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
// Rust: ditto.
do _line.scan_float().map_default(false) |&_endpos| {
let _prefix = _line.slice(0, _endpos);
do float::from_str(_prefix).map_default(false) |&_value| {
$dst = _value;
lex!(_line.slice_to_end(_endpos); $($tail)*)
}
}
});
($e:expr; str -> $dst:expr, $($tail:tt)*) => ({
let _line: &str = $e;
$dst = _line.to_owned();
lex!(""; $($tail)*) // optimization!
});
($e:expr; ws, $($tail:tt)*) => ({
let _line: &str = $e;
if !_line.is_empty() && char::is_whitespace(_line.char_at(0)) {
lex!(str::trim_left(_line); $($tail)*)
} else {
false
}
});
($e:expr; ws*, $($tail:tt)*) => ({
let _line: &str = $e;
lex!(str::trim_left(_line); $($tail)*)
});
($e:expr; int, $($tail:tt)*) => ({
let mut _dummy: int = 0;
lex!($e; int -> _dummy, $($tail)*)
});
($e:expr; uint, $($tail:tt)*) => ({
let mut _dummy: uint = 0;
lex!($e; uint -> _dummy, $($tail)*)
});
($e:expr; float, $($tail:tt)*) => ({
let mut _dummy: float = 0.0;
lex!($e; float -> _dummy, $($tail)*)
});
($e:expr; str, $($tail:tt)*) => ({
lex!(""; $($tail)*) // optimization!
});
($e:expr; $lit:expr, $($tail:tt)*) => ({
do $lit.prefix_shifted($e).map_default(false) |&_line| {
lex!(_line; $($tail)*)
}
});
($e:expr; int -> $dst:expr) => (lex!($e; int -> $dst, ));
($e:expr; uint -> $dst:expr) => (lex!($e; uint -> $dst, ));
($e:expr; float -> $dst:expr) => (lex!($e; float -> $dst, ));
($e:expr; str -> $dst:expr) => (lex!($e; str -> $dst, ));
($e:expr; ws) => (lex!($e; ws, ));
($e:expr; ws*) => (lex!($e; ws*, ));
($e:expr; int) => (lex!($e; int, ));
($e:expr; uint) => (lex!($e; uint, ));
($e:expr; float) => (lex!($e; float, ));
($e:expr; str) => (lex!($e; str, ));
($e:expr; $lit:expr) => (lex!($e; $lit, ))
)
fn main() {
let mut x = 0, y = 0.0, s = ~"";
if lex!(~" +42 0 hello -5.4 @ Hello, world!";
ws*, int -> x, ws*, uint, ws*, "hello ", float -> y, ws, '@', str -> s, !) {
io::println(fmt!("[%?][%?][%?]", x, y, s));
} else {
io::println(~"fail");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment