Skip to content

Instantly share code, notes, and snippets.

@DrMetallius
Created August 31, 2019 20:34
Show Gist options
  • Save DrMetallius/4684af63559f3400b7b437c4759cd317 to your computer and use it in GitHub Desktop.
Save DrMetallius/4684af63559f3400b7b437c4759cd317 to your computer and use it in GitHub Desktop.
S-expression parser
use std::io;
use std::io::{Bytes, Read, Error, ErrorKind};
use std::str;
use std::fmt;
use std::fmt::{Display, Write};
use std::str::FromStr;
fn main() {
let result = ExpressionReader::read(io::stdin()).unwrap();
println!("Result: {:?}", result);
for (pos, data) in result.iter().enumerate() {
if pos > 0 { print!(" "); }
print!("{}", data);
}
}
struct ExpressionReader<R: Read> {
bytes: Bytes<R>,
root: Vec<Data>,
parents: Vec<Vec<Data>>,
cur_element: ElementType,
cur_element_buf: String,
add_next: bool
}
impl<R: Read> ExpressionReader<R> {
fn read(source: R) -> Result<Vec<Data>, Error> {
let mut reader = ExpressionReader {
bytes: source.bytes(),
root: Vec::new(),
parents: Vec::new(),
cur_element: ElementType::List,
cur_element_buf: String::new(),
add_next: false
};
loop {
let next_char_result = reader.read_char()?;
let next_char = match next_char_result {
None => break,
Some(ch) => ch
};
reader.process_char(next_char)?;
}
reader.terminate_element()?;
Ok(reader.root)
}
fn read_char(&mut self) -> Result<Option<char>, Error> {
let first_byte = match self.bytes.next() {
None => return Ok(None),
Some(Ok(b)) => b,
Some(Err(e)) => return Err(e),
};
let width = get_utf8_char_width(first_byte)?;
if width == 1 { return Ok(Some(first_byte as char)) }
let mut buf = [first_byte, 0, 0, 0];
for position in 1..width {
let next_byte = match self.bytes.next() {
None => return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of UTF-8 stream")),
Some(Err(e)) => return Err(e),
Some(Ok(b)) => b
};
buf[position] = next_byte;
}
match str::from_utf8(&buf[..width]) {
Ok(result) => Ok(Some(result.chars().next().unwrap())),
Err(err) => Err(Error::new(ErrorKind::InvalidData, err))
}
}
fn process_char(&mut self, next_char: char) -> Result<(), Error> {
match self.cur_element {
ElementType::List =>
match next_char {
'(' => {
self.parents.push(Vec::new());
self.cur_element = ElementType::List;
},
')' => {
match self.parents.pop() {
Some(list_vec) => {
let list = Data::List(list_vec);
self.put_to_topmost_list(list);
},
None => return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of list"))
}
},
'"' => {
self.cur_element = ElementType::String;
self.cur_element_buf.clear();
self.add_next = false;
},
_ if next_char.is_whitespace() => (),
_ => {
self.cur_element = if next_char == '-' || next_char.is_digit(10) {
ElementType::Number
} else {
ElementType::Symbol
};
self.cur_element_buf.clear();
self.cur_element_buf.push(next_char);
self.add_next = false;
}
},
ElementType::String | ElementType::Symbol =>
if self.add_next {
self.cur_element_buf.push(next_char);
self.add_next = false;
} else {
match next_char {
'\\' => self.add_next = true,
'"' if self.cur_element == ElementType::String => {
let string = Data::String(self.cur_element_buf.clone());
self.put_to_topmost_list(string);
self.cur_element = ElementType::List;
},
_ if next_char.is_whitespace() && self.cur_element == ElementType::Symbol => {
let symbol = Data::Symbol(self.cur_element_buf.clone());
self.put_to_topmost_list(symbol);
self.cur_element = ElementType::List;
}
_ => self.cur_element_buf.push(next_char)
}
},
ElementType::Number =>
if next_char.is_digit(10) || next_char == '.' || next_char == 'e' || next_char == 'E' {
self.cur_element_buf.push(next_char);
} else {
self.terminate_element()?;
self.cur_element = ElementType::List;
return self.process_char(next_char);
}
};
Ok(())
}
fn put_to_topmost_list(&mut self, element: Data) {
if self.parents.is_empty() {
self.root.push(element);
} else {
let last_index = self.parents.len() - 1;
self.parents[last_index].push(element);
}
}
fn terminate_element(&mut self) -> Result<(), Error> {
if let ElementType::Number = self.cur_element {
if let Ok(number) = i64::from_str(&*self.cur_element_buf) {
self.put_to_topmost_list(Data::Integer(number));
} else if let Ok(number) = f64::from_str(&*self.cur_element_buf) {
self.put_to_topmost_list(Data::Float(number));
} else {
let symbol = Data::Symbol(self.cur_element_buf.clone());
self.put_to_topmost_list(symbol);
}
self.cur_element = ElementType::List;
} else if !self.parents.is_empty() || self.cur_element != ElementType::List {
return Err(Error::new(ErrorKind::InvalidData, "Unexpected end of input"))
}
Ok(())
}
}
fn get_utf8_char_width(first_byte: u8) -> Result<usize, Error> {
if (first_byte & 0b1000_0000) == 0b0000_0000 {
Ok(1)
} else if (first_byte & 0b1110_0000) == 0b1100_0000 {
Ok(2)
} else if (first_byte & 0b1111_0000) == 0b1110_0000 {
Ok(3)
} else if (first_byte & 0b1111_1000) == 0b1111_0000 {
Ok(4)
} else {
Err(Error::new(ErrorKind::InvalidData, "Invalid first UTF-8 byte"))
}
}
#[derive(Debug)]
enum Data {
List(Vec<Data>),
Symbol(String),
String(String),
Integer(i64),
Float(f64)
}
impl Display for Data {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
match self {
&Data::List(ref value) => {
formatter.write_char('(')?;
for (pos, data) in value.iter().enumerate() {
if pos > 0 { formatter.write_char(' ')? }
data.fmt(formatter)?;
}
formatter.write_char(')')?;
},
&Data::Symbol(ref value) => value.replace(' ', r#"\ "#).fmt(formatter)?,
&Data::String(ref value) => {
formatter.write_char('"')?;
value.replace('"', r#"\""#).fmt(formatter)?;
formatter.write_char('"')?;
},
&Data::Integer(value) => value.fmt(formatter)?,
&Data::Float(value) => value.fmt(formatter)?
};
Ok(())
}
}
#[derive(PartialEq)]
enum ElementType {
List,
Symbol,
String,
Number
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment