iwillspeak/.gitignore

## .gitignore
target

## Cargo.lock
[root]
name = "lexer_example"
version = "0.0.1"


## Cargo.toml
[package]

name = "lexer_example"
version = "0.0.1"
authors = ["Will Speak <will@willspeak.me>"]

[[bin]]
    name = "lexer_example"
    path = "main.rs"

## main.rs
use std::io;
use std::io::prelude::*;

#[derive(Debug,PartialEq)]
pub enum Tok {
    Var(String),
    Num(i32),
    Plus
}

#[derive(PartialEq)]
enum State {
    S, A, B, C, D
}

pub struct Tokeniser {
    ts: usize,
    chars: String
}

impl Tokeniser {

    pub fn new_from_str(string: &str) -> Tokeniser {
        let bytes: Vec<_> = string.bytes().collect();
        let mut temp_reader = io::Cursor::new(bytes);

        Tokeniser::new(&mut temp_reader)
    }

    pub fn new(reader: &mut Read) -> Tokeniser {

        let mut buf = String::new();

        reader.read_to_string(&mut buf).unwrap();

        Tokeniser {
            ts: 0,
            chars: buf
        }
    }

    fn next_match(&mut self) -> Option<Tok> {

        loop {

            let mut state = State::S;
            let mut te = self.ts;

            for c in self.chars[self.ts..].chars() {

                // find the next transition in the state machine
                let next = match state {
                    State::S => match c {
                        ' ' | '\t' => Some(State::A),
                        '+' => Some(State::B),
                        '0'...'9' => Some(State::C),
                        'a'...'z' => Some(State::D),
                        _ => None
                    },
                    State::A => match c {
                        ' ' | '\t' => Some(State::A),
                        _ => None
                    },
                    State::B => None,
                    State::C => match c {
                        '0'...'9' => Some(State::C),
                        _ => None
                    },
                    State::D => match c {
                        'a'...'z' | '0'...'9' => Some(State::D),
                        _ => None
                    }
                };

                // If we found a transition then consume the character
                // and move to that state
                if let Some(next_state) = next {
                    state = next_state;
                    te += 1;
                } else {
                    break;
                }
            }

            // once we can no longer match any more characters we
            // decide what token to return
            let token_str = &self.chars[self.ts..te];

            self.ts = te;

            // If we recognised some whitespace, look for the next
            // token instead
            if state == State::A {
                continue;
            }

            // Depending on which state we're in we know which token
            // we have just accepted
            return match state {
                State::B => Some(Tok::Plus),
                State::C => Some(Tok::Num(token_str.parse().unwrap())),
                State::D => Some(Tok::Var(token_str.to_string())),
                _ => None
            };
        }
    }

    pub fn matches(&mut self) -> Vec<Tok> {
        let mut v = vec![];
        while let Some(tok) = self.next_match() {
            v.push(tok);
        }
        v
    }
}

#[cfg(not(test))]
fn dump_tokens(s: &str) {

    println!("Tokenising: '{}'", s);
    for tok in Tokeniser::new_from_str(s).matches() {
        println!("{0:?}", tok);
    }
}

#[cfg(not(test))]
fn main() {

    dump_tokens("some tokens + 123 things");

    let stdin = io::stdin();
    for line in stdin.lock().lines() {
        dump_tokens(line.unwrap().as_ref());
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn var_matches_returns_var() {
        let matched = Tokeniser::new_from_str("some vars").matches();
        assert_eq!(Tok::Var("some".to_string()), matched[0]);
        assert_eq!(Tok::Var("vars".to_string()), matched[1]);
    }

    #[test]
    fn num_matches_returns_num() {
        let matched = Tokeniser::new_from_str("13337 1701 123").matches();
        assert_eq!(Tok::Num(13337), matched[0]);
        assert_eq!(Tok::Num(1701), matched[1]);
        assert_eq!(Tok::Num(123), matched[2]);
    }

    #[test]
    fn mixed_string_returns_tokens() {
        let matched = Tokeniser::new_from_str("some + var + 1337").matches();
        assert_eq!(Tok::Var("some".to_string()), matched[0]);
        assert_eq!(Tok::Plus, matched[1]);
        assert_eq!(Tok::Var("var".to_string()), matched[2]);
        assert_eq!(Tok::Plus, matched[3]);
        assert_eq!(Tok::Num(1337), matched[4]);
    }

    #[test]
    fn without_spaces_all_identifiers_are_returned() {
        let matched = Tokeniser::new_from_str("123v123").matches();
        assert_eq!(Tok::Num(123), matched[0]);
        assert_eq!(Tok::Var("v123".to_string()), matched[1]);
    }
}
	[package]

	name = "lexer_example"
	version = "0.0.1"
	authors = ["Will Speak <will@willspeak.me>"]

	[[bin]]
	name = "lexer_example"
	path = "main.rs"
	use std::io;
	use std::io::prelude::*;

	#[derive(Debug,PartialEq)]
	pub enum Tok {
	Var(String),
	Num(i32),
	Plus
	}

	#[derive(PartialEq)]
	enum State {
	S, A, B, C, D
	}

	pub struct Tokeniser {
	ts: usize,
	chars: String
	}

	impl Tokeniser {

	pub fn new_from_str(string: &str) -> Tokeniser {
	let bytes: Vec<_> = string.bytes().collect();
	let mut temp_reader = io::Cursor::new(bytes);

	Tokeniser::new(&mut temp_reader)
	}

	pub fn new(reader: &mut Read) -> Tokeniser {

	let mut buf = String::new();

	reader.read_to_string(&mut buf).unwrap();

	Tokeniser {
	ts: 0,
	chars: buf
	}
	}

	fn next_match(&mut self) -> Option<Tok> {

	loop {

	let mut state = State::S;
	let mut te = self.ts;

	for c in self.chars[self.ts..].chars() {

	// find the next transition in the state machine
	let next = match state {
	State::S => match c {
	' ' \| '\t' => Some(State::A),
	'+' => Some(State::B),
	'0'...'9' => Some(State::C),
	'a'...'z' => Some(State::D),
	_ => None
	},
	State::A => match c {
	' ' \| '\t' => Some(State::A),
	_ => None
	},
	State::B => None,
	State::C => match c {
	'0'...'9' => Some(State::C),
	_ => None
	},
	State::D => match c {
	'a'...'z' \| '0'...'9' => Some(State::D),
	_ => None
	}
	};

	// If we found a transition then consume the character
	// and move to that state
	if let Some(next_state) = next {
	state = next_state;
	te += 1;
	} else {
	break;
	}
	}

	// once we can no longer match any more characters we
	// decide what token to return
	let token_str = &self.chars[self.ts..te];

	self.ts = te;

	// If we recognised some whitespace, look for the next
	// token instead
	if state == State::A {
	continue;
	}

	// Depending on which state we're in we know which token
	// we have just accepted
	return match state {
	State::B => Some(Tok::Plus),
	State::C => Some(Tok::Num(token_str.parse().unwrap())),
	State::D => Some(Tok::Var(token_str.to_string())),
	_ => None
	};
	}
	}

	pub fn matches(&mut self) -> Vec<Tok> {
	let mut v = vec![];
	while let Some(tok) = self.next_match() {
	v.push(tok);
	}
	v
	}
	}

	#[cfg(not(test))]
	fn dump_tokens(s: &str) {

	println!("Tokenising: '{}'", s);
	for tok in Tokeniser::new_from_str(s).matches() {
	println!("{0:?}", tok);
	}
	}

	#[cfg(not(test))]
	fn main() {

	dump_tokens("some tokens + 123 things");

	let stdin = io::stdin();
	for line in stdin.lock().lines() {
	dump_tokens(line.unwrap().as_ref());
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn var_matches_returns_var() {
	let matched = Tokeniser::new_from_str("some vars").matches();
	assert_eq!(Tok::Var("some".to_string()), matched[0]);
	assert_eq!(Tok::Var("vars".to_string()), matched[1]);
	}

	#[test]
	fn num_matches_returns_num() {
	let matched = Tokeniser::new_from_str("13337 1701 123").matches();
	assert_eq!(Tok::Num(13337), matched[0]);
	assert_eq!(Tok::Num(1701), matched[1]);
	assert_eq!(Tok::Num(123), matched[2]);
	}

	#[test]
	fn mixed_string_returns_tokens() {
	let matched = Tokeniser::new_from_str("some + var + 1337").matches();
	assert_eq!(Tok::Var("some".to_string()), matched[0]);
	assert_eq!(Tok::Plus, matched[1]);
	assert_eq!(Tok::Var("var".to_string()), matched[2]);
	assert_eq!(Tok::Plus, matched[3]);
	assert_eq!(Tok::Num(1337), matched[4]);
	}

	#[test]
	fn without_spaces_all_identifiers_are_returned() {
	let matched = Tokeniser::new_from_str("123v123").matches();
	assert_eq!(Tok::Num(123), matched[0]);
	assert_eq!(Tok::Var("v123".to_string()), matched[1]);
	}
	}