Skip to content

Instantly share code, notes, and snippets.

@Mr4k
Created January 6, 2020 16:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Mr4k/56d84c987a0316172fcd598749ec3288 to your computer and use it in GitHub Desktop.
Save Mr4k/56d84c987a0316172fcd598749ec3288 to your computer and use it in GitHub Desktop.
Toy Lisp Interpreter
use std::env;
const EMPTY_NODE_ID: usize = 0;
// an intermediate representation for symbol data during the parse step
#[derive(Clone, Debug, PartialEq)]
struct ParseSymbol {
string: String,
debug_program_position: usize,
}
#[derive(Clone, Debug, PartialEq)]
enum ASTIdOrSymbol {
ASTId(usize),
Symbol(Symbol),
}
#[derive(Clone, Debug, PartialEq)]
struct Symbol {
data: Primitives,
debug_program_position: usize,
}
#[derive(Clone, Debug, PartialEq)]
enum Primitives {
Integer { value: i32 },
AddOp,
}
#[derive(Clone, Debug, PartialEq)]
struct AST {
// the AST does not need a debug_program_position because parenthesis are parsed away
// and therefore we cannot error at one during runtime
node_id: usize,
parent_node_id: usize,
args: Vec<ASTIdOrSymbol>,
}
impl AST {
pub fn new(node_id: usize, parent_node_id: usize) -> Self {
Self {
node_id,
parent_node_id,
args: Vec::new(),
}
}
}
fn parse_symbols_from_program_string(program_string: &str, parsed_symbols: &mut Vec<ParseSymbol>) {
let mut current_symbol_chars: Vec<char> = Vec::new();
for (pos, chr) in program_string.chars().enumerate() {
match chr {
'(' | ')' => {
// flush any previous symbol
if current_symbol_chars.len() > 0 {
let symbol_string: String = current_symbol_chars.drain(..).collect();
parsed_symbols.push(ParseSymbol {
debug_program_position: pos - symbol_string.len(),
string: symbol_string,
});
}
// push the parenthesis as it's own symbol
parsed_symbols.push(ParseSymbol {
string: chr.to_string(),
debug_program_position: pos,
});
}
chr if chr.is_whitespace() => {
// flush any previous symbol
if current_symbol_chars.len() > 0 {
let symbol_string: String = current_symbol_chars.drain(..).collect();
parsed_symbols.push(ParseSymbol {
debug_program_position: pos - symbol_string.len(),
string: symbol_string,
});
}
}
_ => {
current_symbol_chars.push(chr);
}
}
}
// flush any remaining symbols which have not been completed
// note this techinically shouldn't be needed since these programs should end with ')'
// but deciding if the program is valid is not the resposibility of this function
if current_symbol_chars.len() > 0 {
let symbol_string: String = current_symbol_chars.drain(..).collect();
parsed_symbols.push(ParseSymbol {
debug_program_position: program_string.len() - symbol_string.len(),
string: symbol_string,
});
}
}
fn construct_ast_from_parse_symbols(program_symbols: Vec<ParseSymbol>, program_ast: &mut Vec<AST>) {
let mut max_node_id = 0;
// index 0 is reserved for the EMPTY_NODE_ID
// this sets it aside although maybe not in the best way
let mut current_ast_id = 0;
program_ast.push(AST::new(EMPTY_NODE_ID, EMPTY_NODE_ID));
for parse_symbol in program_symbols.iter() {
let current_tree = program_ast.get_mut(current_ast_id).unwrap();
match &*parse_symbol.string {
"(" => {
let parent_node_id = current_ast_id;
max_node_id += 1;
let new_node_id = max_node_id;
current_tree.args.push(ASTIdOrSymbol::ASTId(new_node_id));
program_ast.push(AST::new(new_node_id, parent_node_id));
current_ast_id = max_node_id;
}
")" => {
// assert here that we have an open subtree to close
if current_ast_id == 0 {
panic!(format!(
"Parse Error: Invalid Close Parenthesis at position {}",
parse_symbol.debug_program_position
));
}
current_ast_id = current_tree.parent_node_id;
}
// operatators and keywords
"+" => {
current_tree.args.push(ASTIdOrSymbol::Symbol(Symbol {
debug_program_position: parse_symbol.debug_program_position,
data: Primitives::AddOp,
}));
}
// numbers (currently only integers)
num if num.parse::<i32>().is_ok() => {
current_tree.args.push(ASTIdOrSymbol::Symbol(Symbol {
debug_program_position: parse_symbol.debug_program_position,
data: Primitives::Integer {
value: num.parse::<i32>().unwrap(),
},
}));
}
unknown => panic!(format!(
"Parse Error: Invalid Symbol {} at position {}",
unknown, parse_symbol.debug_program_position
)),
}
}
if current_ast_id != 0 {
panic!("Parse Error: Error Parenthesis Left Open at end of program");
}
}
fn main() {
let args: Vec<String> = env::args().collect();
let program = match args.get(1) {
Some(arg) => arg,
None => "",
};
if program == "" {
println!("Warning program is empty.");
}
let mut program_ast: Vec<AST> = Vec::new();
{
println!("Parsing Text...");
let mut program_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(program, &mut program_symbols);
println!("Constructing AST...");
construct_ast_from_parse_symbols(program_symbols, &mut program_ast);
}
println!("AST {:?}", program_ast);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_symbols_from_program_string_handles_symbols_cutoffs_with_whitespace() {
let test_string = "aa bbc d2";
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
assert_eq!(
parse_symbols,
vec![
ParseSymbol {
string: "aa".to_string(),
debug_program_position: 0
},
ParseSymbol {
string: "bbc".to_string(),
debug_program_position: 3
},
ParseSymbol {
string: "d2".to_string(),
debug_program_position: 9
},
]
);
}
#[test]
fn parse_symbols_from_program_string_handles_symbols_cutoffs_with_parenthesis() {
let test_string = "aa(bbc)d2";
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
assert_eq!(
parse_symbols,
vec![
ParseSymbol {
string: "aa".to_string(),
debug_program_position: 0
},
ParseSymbol {
string: "(".to_string(),
debug_program_position: 2
},
ParseSymbol {
string: "bbc".to_string(),
debug_program_position: 3
},
ParseSymbol {
string: ")".to_string(),
debug_program_position: 6
},
ParseSymbol {
string: "d2".to_string(),
debug_program_position: 7
},
]
);
}
#[test]
fn parse_symbols_from_program_string_handles_repeated_parenthesis_correctly() {
let test_string = "(()))";
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
assert_eq!(
parse_symbols,
vec![
ParseSymbol {
string: "(".to_string(),
debug_program_position: 0
},
ParseSymbol {
string: "(".to_string(),
debug_program_position: 1
},
ParseSymbol {
string: ")".to_string(),
debug_program_position: 2
},
ParseSymbol {
string: ")".to_string(),
debug_program_position: 3
},
ParseSymbol {
string: ")".to_string(),
debug_program_position: 4
},
]
);
}
#[test]
#[should_panic(expected = "Parse Error: Error Parenthesis Left Open at end of program")]
fn construct_ast_from_parse_symbols_errors_on_unmatched_open_parenthesis() {
let test_string = "((+ 1 2)";
let mut program_ast: Vec<AST> = Vec::new();
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast);
}
#[test]
#[should_panic(expected = "Parse Error: Invalid Close Parenthesis at position 11")]
fn construct_ast_from_parse_symbols_errors_on_unmatched_closed_parenthesis() {
let test_string = "((+ 1 2) 5)) (7))";
let mut program_ast: Vec<AST> = Vec::new();
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast);
}
#[test]
#[should_panic(expected = "Parse Error: Invalid Symbol $ at position 11")]
fn construct_ast_from_parse_symbols_errors_on_invalid_symbol() {
let test_string = "(+ (+ 1 2) $ 7)";
let mut program_ast: Vec<AST> = Vec::new();
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast);
}
#[test]
fn construct_ast_from_parse_symbols_simple_expression_correctly() {
let test_string = "(+ 1 2)";
let mut program_ast: Vec<AST> = Vec::new();
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast);
assert_eq!(
program_ast,
vec![
AST {
node_id: 0,
parent_node_id: 0,
args: vec![ASTIdOrSymbol::ASTId(1)]
},
AST {
node_id: 1,
parent_node_id: 0,
args: vec![
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::AddOp,
debug_program_position: 1
}),
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::Integer { value: 1 },
debug_program_position: 3
}),
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::Integer { value: 2 },
debug_program_position: 5
})
]
}
]
);
}
#[test]
fn construct_ast_from_parse_symbols_works_with_simple_subtrees() {
let test_string = "(+ (+ 1 3) 2)";
let mut program_ast: Vec<AST> = Vec::new();
let mut parse_symbols: Vec<ParseSymbol> = Vec::new();
parse_symbols_from_program_string(test_string, &mut parse_symbols);
construct_ast_from_parse_symbols(parse_symbols, &mut program_ast);
assert_eq!(
program_ast,
vec![
AST {
node_id: 0,
parent_node_id: 0,
args: vec![ASTIdOrSymbol::ASTId(1)]
},
AST {
node_id: 1,
parent_node_id: 0,
args: vec![
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::AddOp,
debug_program_position: 1
}),
ASTIdOrSymbol::ASTId(2),
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::Integer { value: 2 },
debug_program_position: 11
})
]
},
AST {
node_id: 2,
parent_node_id: 1,
args: vec![
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::AddOp,
debug_program_position: 4
}),
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::Integer { value: 1 },
debug_program_position: 6
}),
ASTIdOrSymbol::Symbol(Symbol {
data: Primitives::Integer { value: 3 },
debug_program_position: 8
})
]
}
]
);
}
}
@Mr4k
Copy link
Author

Mr4k commented Jan 6, 2020

This program currently parses a simple lisp string and turns it into an AST

Example usage:
input:
toy_lisp (+ 1 (+ 2 3))
output:

Parsing Text...
Constructing AST...
AST [AST { node_id: 0, parent_node_id: 0, args: [ASTId(1)] }, AST { node_id: 1, parent_node_id: 0, args: [Symbol(Symbol { data: AddOp, debug_program_position: 2 }), Symbol(Symbol { data: Integer { value: 1 }, debug_program_position: 4 }), ASTId(2)] }, AST { node_id: 2, parent_node_id: 1, args: [Symbol(Symbol { data: AddOp, debug_program_position: 7 }), Symbol(Symbol { data: Integer { value: 2 }, debug_program_position: 9 }), Symbol(Symbol { data: Integer { value: 3 }, debug_program_position: 11 })] }]

It will also check for two simple kinds of malformed programs:
Imbalanced Parenthesis:
input:
toy_lisp (+ 1 (+ 2 3)
output:

Parsing Text...
Constructing AST...
thread 'main' panicked at 'Parse Error: Error Parenthesis Left Open at end of program', src/main.rs:149:9
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace.

input:
toy_lisp (+ 1 (+ 2 3)))
output:

Parsing Text...
Constructing AST...
thread 'main' panicked at 'Parse Error: Invalid Close Parenthesis at position 14', src/main.rs:118:21
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace.

Invalid Symbol:
input:
toy_lisp (+ 1 (u 2 3)))
output:

Parsing Text...
Constructing AST...
thread 'main' panicked at 'Parse Error: Invalid Symbol u at position 7', src/main.rs:141:24
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace.

TODO
Actually interpret lisp

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment