Skip to content

Instantly share code, notes, and snippets.

@amorphobia
Last active April 4, 2022 06:36
Show Gist options
  • Save amorphobia/96284562aa015a78473cd03c5698fadd to your computer and use it in GitHub Desktop.
Save amorphobia/96284562aa015a78473cd03c5698fadd to your computer and use it in GitHub Desktop.
use crate::lexer::Lexer;
pub mod token {
pub mod tag {
#[derive(Clone, Copy, Debug)]
pub enum Tag {
Char(u8),
Num,
Id,
True,
False,
}
pub const NUM: Tag = Tag::Num;
pub const ID: Tag = Tag::Id;
pub const TRUE: Tag = Tag::True;
pub const FALSE: Tag = Tag::False;
}
use std::fmt::Debug;
pub use tag::*;
pub trait Token {
fn tag(&self) -> Tag;
}
#[derive(Clone, Copy, Debug)]
pub struct Num {
tag: Tag,
value: i32,
}
impl Token for Num {
fn tag(&self) -> Tag {
self.tag
}
}
impl Num {
pub fn new(value: i32) -> Self {
Self { tag: NUM, value }
}
pub fn value(&self) -> i32 {
self.value
}
}
#[derive(Clone, Debug)]
pub struct Word {
tag: Tag,
lexeme: String,
}
impl Token for Word {
fn tag(&self) -> Tag {
self.tag
}
}
impl Word {
pub fn new(tag: Tag, lexeme: &str) -> Self {
Self {
tag,
lexeme: lexeme.to_string(),
}
}
pub fn lexeme(&self) -> &str {
&self.lexeme
}
}
#[derive(Clone, Copy, Debug)]
pub struct Other {
tag: Tag,
}
impl Token for Other {
fn tag(&self) -> Tag {
self.tag
}
}
impl Other {
pub fn new(tag: Tag) -> Self {
Self { tag }
}
}
}
mod lexer {
use std::{
collections::HashMap,
io::{Bytes, Error, ErrorKind, Read, Stdin},
iter::Peekable,
};
pub use crate::token::*;
#[derive(Debug)]
pub struct Lexer {
pub line: usize,
peek: char,
words: HashMap<String, Word>,
input: Peekable<Bytes<Stdin>>,
}
impl Lexer {
pub fn new() -> Self {
let mut lexer = Self {
line: 0,
peek: ' ',
words: HashMap::new(),
input: std::io::stdin().bytes().peekable(),
};
lexer.reserve(Word::new(TRUE, "true"));
lexer.reserve(Word::new(FALSE, "false"));
lexer
}
fn reserve(&mut self, token: Word) {
self.words.insert(token.lexeme().to_string(), token);
}
fn peek_char(&mut self) -> Option<u8> {
if let Some(peek) = self.input.peek() {
match peek {
Ok(peek) => Some(*peek),
Err(_) => None,
}
} else {
None
}
}
fn next_char(&mut self) -> Option<u8> {
if let Some(next) = self.input.next() {
match next {
Ok(next) => Some(next),
Err(_) => None,
}
} else {
None
}
}
fn skip_blank_and_comments(&mut self) {
while let Some(peek) = self.next_char() {
self.peek = char::from(peek);
match self.peek {
' ' | '\t' => continue,
'\r' | '\n' => self.skip_new_line(),
'/' => match self.peek_char() {
Some(b'/') | Some(b'*') => self.skip_comments(),
_ => break,
},
_ => break,
}
}
}
fn skip_new_line(&mut self) {
if self.peek == '\r' {
self.next_char();
}
self.line += 1;
}
fn skip_comments(&mut self) {
if let Some(peek) = self.peek_char() {
match peek {
b'/' => self.skip_line(),
b'*' => {
self.next_char();
while let Some(peek) = self.next_char() {
self.peek = char::from(peek);
match self.peek {
'\r' | '\n' => self.skip_new_line(),
'*' => {
if let Some(peek) = self.peek_char() {
match peek {
b'/' => {
self.next_char();
break;
}
_ => (),
}
} else {
// EOF
todo!()
}
}
_ => (),
}
}
}
_ => unreachable!(),
}
} else {
unreachable!()
}
}
fn skip_line(&mut self) {
while let Some(peek) = self.next_char() {
self.peek = char::from(peek);
match self.peek {
'\r' | '\n' => {
self.skip_new_line();
break;
}
_ => (),
}
}
}
pub fn scan(&mut self) -> Result<Box<dyn Token>, Error> {
self.skip_blank_and_comments();
if self.peek.is_ascii_digit() {
let mut value = 0;
loop {
value = 10 * value
+ self
.peek
.to_digit(10)
.ok_or(Error::new(ErrorKind::Other, "Not digit"))?;
self.peek = if let Some(peek) = self.next_char() {
char::from(peek)
} else {
' '
};
if !self.peek.is_ascii_digit() {
break;
}
}
Ok(Box::new(Num::new(
value
.try_into()
.map_err(|err| Error::new(ErrorKind::Other, err))?,
)))
} else if self.peek.is_ascii_alphabetic() {
let mut buf = String::new();
loop {
buf.push(self.peek);
self.peek = if let Some(peek) = self.next_char() {
char::from(peek)
} else {
' '
};
if !self.peek.is_ascii_alphanumeric() {
break;
}
}
Ok(Box::new(
self.words
.entry(buf.clone())
.or_insert(Word::new(ID, &buf))
.clone(),
))
} else if "!<=>".contains(self.peek) {
match self.peek {
'!' => todo!(),
'<' => todo!(),
'=' => todo!(),
'>' => todo!(),
_ => unreachable!(),
}
} else {
let peek = self
.peek
.try_into()
.map_err(|err| Error::new(ErrorKind::Other, err))?;
self.peek = ' ';
Ok(Box::new(Other::new(Tag::Char(peek))))
}
}
}
}
fn main() {
let mut lexer = Lexer::new();
let result = lexer.scan().expect("Error");
println!("{:?}", lexer);
println!("{:?}", result.tag());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment