Skip to content

Instantly share code, notes, and snippets.

@StephenWakely
Created August 11, 2020 08:50
Show Gist options
  • Save StephenWakely/f7d811bf3a35168cd8840b872c3bd18c to your computer and use it in GitHub Desktop.
Save StephenWakely/f7d811bf3a35168cd8840b872c3bd18c to your computer and use it in GitHub Desktop.
use std::collections::HashMap;
use std::env;
// Alias some types
type Count = HashMap<String, usize>;
type ParseError = String;
/// Take a list of tuples of strings and counts and
/// convert them into a hash table.
fn make_count(tpls: Vec<(String, usize)>) -> Count {
tpls.iter().cloned().collect::<Count>()
}
/// Eats a single atom, or a grouped set of atoms (within parens) from the string.
/// Returns a Result (Ok for a successful parse or Err(ParseError) for a failed parse)
/// of (String, Count) a tuple of the remaining yet to be parsed string
/// and Count - a Hashmap of atoms to counts.
fn eat_atom(molecule: &str) -> Result<(String, Count), ParseError> {
if molecule.len() == 0 {
return Err("Empty molecule".to_string());
}
let (head, tail) = molecule.split_at(1);
if matches!(head, "(" | "{" | "[") {
// We are in a paren, recursively parse all the inner contents of this.
let (rest, count) = parse_molecule(&tail)?;
if rest.len() > 0 {
let (head, tail) = rest.split_at(1);
// We should really match against the same paren that we opened with,
// but life is short..
if matches!(head, ")" | "}" | "]") {
Ok((tail.to_string(), count))
} else {
Err("No closing paren".to_string())
}
} else {
Err("No closing paren".to_string())
}
} else {
// Its just a single atom, which can be one upper case followed by n lowercase chars.
let tail = tail
.chars()
.take_while(|c| c.is_lowercase())
.collect::<String>();
let atom = format!("{}{}", head, tail);
let remaining = molecule.chars().skip(atom.len()).collect::<String>();
Ok((remaining, make_count(vec![(atom, 1)])))
}
}
/// Eats the mulitplier from the string.
/// The multiplier is any numeric characters.
/// We may not have any multiplier, so return None if there aren't any.
fn eat_multiplier(molecule: &str) -> Option<(String, usize)> {
let num = molecule
.chars()
.take_while(|c| c.is_numeric())
.collect::<String>();
let remaining = molecule.chars().skip(num.len()).collect::<String>();
match num.parse() {
Ok(num) => Some((remaining, num)),
Err(_) => None,
}
}
/// Multiply every element in our Count hash by the given multiplier.
/// Note count is mutable, so we are directly changing it's contents.
fn multiply_count(count: &mut Count, multiplier: usize) {
count.iter_mut().for_each(|(_, val)| *val *= multiplier);
}
/// Add all the counts from one hash table to another.
fn add_counts(count1: &mut Count, count2: Count) {
for (key, value) in count2 {
match count1.get_mut(&key) {
Some(val) => {
*val += value;
}
None => {
count1.insert(key, value);
}
}
}
}
/// Parse the atom and the count returning the hash table of the counts.
fn parse_atom(molecule: &str) -> Result<(String, Count), ParseError> {
let (mut remaining, mut count) = eat_atom(molecule)?;
if let Some((remaining_, multiplier)) = eat_multiplier(&remaining) {
multiply_count(&mut count, multiplier);
remaining = remaining_;
}
Ok((remaining, count))
}
/// Is this the end of a molecule?
/// Either the closing bracket or end of the string.
fn end_molecule(molecule: &str) -> bool {
match molecule.chars().nth(0) {
Some(m) => matches!(m, ')' | '}' | ']'),
None => true,
}
}
/// Parse the molecule - or part of if we are within a bracketed bit.
/// Returns the remaining unparsed string and the counts.
fn parse_molecule(molecule: &str) -> Result<(String, Count), ParseError> {
let mut count = HashMap::new();
let mut remaining = molecule.to_string();
while !end_molecule(&remaining) {
let (remaining_, count_) = parse_atom(&remaining)?;
add_counts(&mut count, count_);
remaining = remaining_;
}
Ok((remaining, count))
}
/// Parse the molecule, returning either the counts of the atoms, or an error if the parsing failed.
fn parse(molecule: &str) -> Result<Count, ParseError> {
let (remaining, count) = parse_molecule(molecule)?;
if remaining != "" {
// We should have parsed the entire string by the end.
Err(format!("Still some molecule remaining {}", remaining))
} else {
Ok(count)
}
}
fn main() {
for molecule in env::args().skip(1) {
println!("{:?}", parse(&molecule));
}
}
#[test]
fn test_h20() {
assert_eq!(
make_count(vec![("H".to_string(), 2), ("O".to_string(), 1)]),
parse("H2O").unwrap(),
);
}
#[test]
fn test_magnesium_hydroxide() {
assert_eq!(
make_count(vec![
("Mg".to_string(), 1),
("O".to_string(), 2),
("H".to_string(), 2)
]),
parse("Mg(OH)2").unwrap(),
);
}
#[test]
fn test_fremy_salt() {
assert_eq!(
make_count(vec![
("K".to_string(), 4),
("O".to_string(), 14),
("N".to_string(), 2),
("S".to_string(), 4),
]),
parse("K4[ON(SO3)2]2").unwrap(),
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment