Skip to content

Instantly share code, notes, and snippets.

@pawarherschel
Last active October 13, 2023 13:41
Show Gist options
  • Save pawarherschel/5ea1ea64b84d6fab7cb9e5dd5e9e7b92 to your computer and use it in GitHub Desktop.
Save pawarherschel/5ea1ea64b84d6fab7cb9e5dd5e9e7b92 to your computer and use it in GitHub Desktop.
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub enum Token<'a> {
Word(&'a str, bool),
Colon,
Negation,
Whitespace,
Empty,
}
impl<'a> Token<'a> {
pub fn get_inner(&self) -> Option<&'a str> {
match self {
Token::Word(s, _) => Some(s),
_ => None,
}
}
pub fn is_word(&self) -> bool {
matches!(self, Token::Word(_, _))
}
pub fn is_whitespace(&self) -> bool {
matches!(self, Token::Whitespace | Token::Empty)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn len(&self) -> usize {
match self {
Token::Word(s, quoted) => s.len() + if *quoted { 2 } else { 0 },
Token::Colon => 1,
Token::Negation => 1,
Token::Whitespace => 1,
Token::Empty => 0,
}
}
}
pub fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut input = input;
while !input.is_empty() {
let (token, rest) = next_token(input);
tokens.push(token);
input = rest;
}
tokens
}
pub fn next_token(input: &str) -> (Token, &str) {
let mut chars = input.chars();
let token = match chars.next() {
Some(':') => Token::Colon,
Some(c) if c.is_whitespace() => Token::Whitespace,
Some('-') => Token::Negation,
Some('"') => {
let mut skip = false;
let mut end = None;
for (idx, c) in chars.enumerate() {
if skip {
skip = false;
continue;
}
if c == '\\' {
skip = true;
continue;
}
if c == '"' {
end = Some(idx);
break;
}
}
let end = end.unwrap_or(input.len() - 1);
Token::Word(&input[1..=end], true)
}
Some(_) => {
let end = chars
.enumerate()
.find(|(_, c)| *c == ':' || c.is_whitespace())
.unwrap_or((input.len() - 1, ' '))
.0;
Token::Word(&input[..=end], false)
}
None => panic!("{:#?}", Token::Empty),
};
let rest = &input[token.len()..];
(token, rest)
}
#[derive(Debug, Clone)]
pub struct TermsConditions<'a> {
pub terms: Vec<(&'a str, bool)>,
pub conditions: HashMap<&'a str, (&'a str, bool)>,
}
pub fn parse(input: &str) -> TermsConditions {
let tokens = tokenize(input);
let mut terms = Vec::new();
let mut conditions = HashMap::new();
tokens
.split(|x| x.is_whitespace())
.for_each(|token_thing| match token_thing.len() {
0 => {}
// just simple terms like "pizza"
1 => match token_thing {
[Token::Word(w, _)] => {
terms.push((*w, false));
}
_ => panic!("Invalid token: {:?}", token_thing),
},
// negated terms like "-tall"
2 => match token_thing {
[Token::Negation, Token::Word(w, _)] => {
terms.push((w, true));
}
_ => panic!("Invalid token: {:?}", token_thing),
},
// key-value pairs like "name:Hayden"
3 => match token_thing {
[Token::Word(k, _), Token::Colon, Token::Word(v, _)] => {
conditions.insert(*k, (*v, false));
}
_ => panic!("Invalid token: {:?}", token_thing),
},
// negated key-value pairs like "tags:-tall"
4 => match token_thing {
[Token::Word(k, _), Token::Colon, Token::Negation, Token::Word(v, _)] => {
conditions.insert(*k, (v, true));
}
_ => panic!("Invalid token: {:?}", token_thing),
},
_ => panic!("Invalid token: {:?}", token_thing),
});
TermsConditions { terms, conditions }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment