Created
August 31, 2019 20:34
-
-
Save DrMetallius/4684af63559f3400b7b437c4759cd317 to your computer and use it in GitHub Desktop.
S-expression parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io; | |
use std::io::{Bytes, Read, Error, ErrorKind}; | |
use std::str; | |
use std::fmt; | |
use std::fmt::{Display, Write}; | |
use std::str::FromStr; | |
fn main() { | |
let result = ExpressionReader::read(io::stdin()).unwrap(); | |
println!("Result: {:?}", result); | |
for (pos, data) in result.iter().enumerate() { | |
if pos > 0 { print!(" "); } | |
print!("{}", data); | |
} | |
} | |
struct ExpressionReader<R: Read> { | |
bytes: Bytes<R>, | |
root: Vec<Data>, | |
parents: Vec<Vec<Data>>, | |
cur_element: ElementType, | |
cur_element_buf: String, | |
add_next: bool | |
} | |
impl<R: Read> ExpressionReader<R> { | |
fn read(source: R) -> Result<Vec<Data>, Error> { | |
let mut reader = ExpressionReader { | |
bytes: source.bytes(), | |
root: Vec::new(), | |
parents: Vec::new(), | |
cur_element: ElementType::List, | |
cur_element_buf: String::new(), | |
add_next: false | |
}; | |
loop { | |
let next_char_result = reader.read_char()?; | |
let next_char = match next_char_result { | |
None => break, | |
Some(ch) => ch | |
}; | |
reader.process_char(next_char)?; | |
} | |
reader.terminate_element()?; | |
Ok(reader.root) | |
} | |
fn read_char(&mut self) -> Result<Option<char>, Error> { | |
let first_byte = match self.bytes.next() { | |
None => return Ok(None), | |
Some(Ok(b)) => b, | |
Some(Err(e)) => return Err(e), | |
}; | |
let width = get_utf8_char_width(first_byte)?; | |
if width == 1 { return Ok(Some(first_byte as char)) } | |
let mut buf = [first_byte, 0, 0, 0]; | |
for position in 1..width { | |
let next_byte = match self.bytes.next() { | |
None => return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of UTF-8 stream")), | |
Some(Err(e)) => return Err(e), | |
Some(Ok(b)) => b | |
}; | |
buf[position] = next_byte; | |
} | |
match str::from_utf8(&buf[..width]) { | |
Ok(result) => Ok(Some(result.chars().next().unwrap())), | |
Err(err) => Err(Error::new(ErrorKind::InvalidData, err)) | |
} | |
} | |
fn process_char(&mut self, next_char: char) -> Result<(), Error> { | |
match self.cur_element { | |
ElementType::List => | |
match next_char { | |
'(' => { | |
self.parents.push(Vec::new()); | |
self.cur_element = ElementType::List; | |
}, | |
')' => { | |
match self.parents.pop() { | |
Some(list_vec) => { | |
let list = Data::List(list_vec); | |
self.put_to_topmost_list(list); | |
}, | |
None => return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of list")) | |
} | |
}, | |
'"' => { | |
self.cur_element = ElementType::String; | |
self.cur_element_buf.clear(); | |
self.add_next = false; | |
}, | |
_ if next_char.is_whitespace() => (), | |
_ => { | |
self.cur_element = if next_char == '-' || next_char.is_digit(10) { | |
ElementType::Number | |
} else { | |
ElementType::Symbol | |
}; | |
self.cur_element_buf.clear(); | |
self.cur_element_buf.push(next_char); | |
self.add_next = false; | |
} | |
}, | |
ElementType::String | ElementType::Symbol => | |
if self.add_next { | |
self.cur_element_buf.push(next_char); | |
self.add_next = false; | |
} else { | |
match next_char { | |
'\\' => self.add_next = true, | |
'"' if self.cur_element == ElementType::String => { | |
let string = Data::String(self.cur_element_buf.clone()); | |
self.put_to_topmost_list(string); | |
self.cur_element = ElementType::List; | |
}, | |
_ if next_char.is_whitespace() && self.cur_element == ElementType::Symbol => { | |
let symbol = Data::Symbol(self.cur_element_buf.clone()); | |
self.put_to_topmost_list(symbol); | |
self.cur_element = ElementType::List; | |
} | |
_ => self.cur_element_buf.push(next_char) | |
} | |
}, | |
ElementType::Number => | |
if next_char.is_digit(10) || next_char == '.' || next_char == 'e' || next_char == 'E' { | |
self.cur_element_buf.push(next_char); | |
} else { | |
self.terminate_element()?; | |
self.cur_element = ElementType::List; | |
return self.process_char(next_char); | |
} | |
}; | |
Ok(()) | |
} | |
fn put_to_topmost_list(&mut self, element: Data) { | |
if self.parents.is_empty() { | |
self.root.push(element); | |
} else { | |
let last_index = self.parents.len() - 1; | |
self.parents[last_index].push(element); | |
} | |
} | |
fn terminate_element(&mut self) -> Result<(), Error> { | |
if let ElementType::Number = self.cur_element { | |
if let Ok(number) = i64::from_str(&*self.cur_element_buf) { | |
self.put_to_topmost_list(Data::Integer(number)); | |
} else if let Ok(number) = f64::from_str(&*self.cur_element_buf) { | |
self.put_to_topmost_list(Data::Float(number)); | |
} else { | |
let symbol = Data::Symbol(self.cur_element_buf.clone()); | |
self.put_to_topmost_list(symbol); | |
} | |
self.cur_element = ElementType::List; | |
} else if !self.parents.is_empty() || self.cur_element != ElementType::List { | |
return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of input")) | |
} | |
Ok(()) | |
} | |
} | |
fn get_utf8_char_width(first_byte: u8) -> Result<usize, Error> { | |
if (first_byte & 0b1000_0000) == 0b0000_0000 { | |
Ok(1) | |
} else if (first_byte & 0b1110_0000) == 0b1100_0000 { | |
Ok(2) | |
} else if (first_byte & 0b1111_0000) == 0b1110_0000 { | |
Ok(3) | |
} else if (first_byte & 0b1111_1000) == 0b1111_0000 { | |
Ok(4) | |
} else { | |
Err(Error::new(ErrorKind::InvalidData, "Invalid first UTF-8 byte")) | |
} | |
} | |
#[derive(Debug)] | |
enum Data { | |
List(Vec<Data>), | |
Symbol(String), | |
String(String), | |
Integer(i64), | |
Float(f64) | |
} | |
impl Display for Data { | |
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { | |
match self { | |
&Data::List(ref value) => { | |
formatter.write_char('(')?; | |
for (pos, data) in value.iter().enumerate() { | |
if pos > 0 { formatter.write_char(' ')? } | |
data.fmt(formatter)?; | |
} | |
formatter.write_char(')')?; | |
}, | |
&Data::Symbol(ref value) => value.replace(' ', r#"\ "#).fmt(formatter)?, | |
&Data::String(ref value) => { | |
formatter.write_char('"')?; | |
value.replace('"', r#"\""#).fmt(formatter)?; | |
formatter.write_char('"')?; | |
}, | |
&Data::Integer(value) => value.fmt(formatter)?, | |
&Data::Float(value) => value.fmt(formatter)? | |
}; | |
Ok(()) | |
} | |
} | |
#[derive(PartialEq)] | |
enum ElementType { | |
List, | |
Symbol, | |
String, | |
Number | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment