Last active
October 13, 2023 13:41
-
-
Save pawarherschel/5ea1ea64b84d6fab7cb9e5dd5e9e7b92 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::HashMap; | |
#[derive(Debug, Clone)] | |
pub enum Token<'a> { | |
Word(&'a str, bool), | |
Colon, | |
Negation, | |
Whitespace, | |
Empty, | |
} | |
impl<'a> Token<'a> { | |
pub fn get_inner(&self) -> Option<&'a str> { | |
match self { | |
Token::Word(s, _) => Some(s), | |
_ => None, | |
} | |
} | |
pub fn is_word(&self) -> bool { | |
matches!(self, Token::Word(_, _)) | |
} | |
pub fn is_whitespace(&self) -> bool { | |
matches!(self, Token::Whitespace | Token::Empty) | |
} | |
pub fn is_empty(&self) -> bool { | |
self.len() == 0 | |
} | |
pub fn len(&self) -> usize { | |
match self { | |
Token::Word(s, quoted) => s.len() + if *quoted { 2 } else { 0 }, | |
Token::Colon => 1, | |
Token::Negation => 1, | |
Token::Whitespace => 1, | |
Token::Empty => 0, | |
} | |
} | |
} | |
pub fn tokenize(input: &str) -> Vec<Token> { | |
let mut tokens = Vec::new(); | |
let mut input = input; | |
while !input.is_empty() { | |
let (token, rest) = next_token(input); | |
tokens.push(token); | |
input = rest; | |
} | |
tokens | |
} | |
pub fn next_token(input: &str) -> (Token, &str) { | |
let mut chars = input.chars(); | |
let token = match chars.next() { | |
Some(':') => Token::Colon, | |
Some(c) if c.is_whitespace() => Token::Whitespace, | |
Some('-') => Token::Negation, | |
Some('"') => { | |
let mut skip = false; | |
let mut end = None; | |
for (idx, c) in chars.enumerate() { | |
if skip { | |
skip = false; | |
continue; | |
} | |
if c == '\\' { | |
skip = true; | |
continue; | |
} | |
if c == '"' { | |
end = Some(idx); | |
break; | |
} | |
} | |
let end = end.unwrap_or(input.len() - 1); | |
Token::Word(&input[1..=end], true) | |
} | |
Some(_) => { | |
let end = chars | |
.enumerate() | |
.find(|(_, c)| *c == ':' || c.is_whitespace()) | |
.unwrap_or((input.len() - 1, ' ')) | |
.0; | |
Token::Word(&input[..=end], false) | |
} | |
None => panic!("{:#?}", Token::Empty), | |
}; | |
let rest = &input[token.len()..]; | |
(token, rest) | |
} | |
#[derive(Debug, Clone)] | |
pub struct TermsConditions<'a> { | |
pub terms: Vec<(&'a str, bool)>, | |
pub conditions: HashMap<&'a str, (&'a str, bool)>, | |
} | |
pub fn parse(input: &str) -> TermsConditions { | |
let tokens = tokenize(input); | |
let mut terms = Vec::new(); | |
let mut conditions = HashMap::new(); | |
tokens | |
.split(|x| x.is_whitespace()) | |
.for_each(|token_thing| match token_thing.len() { | |
0 => {} | |
// just simple terms like "pizza" | |
1 => match token_thing { | |
[Token::Word(w, _)] => { | |
terms.push((*w, false)); | |
} | |
_ => panic!("Invalid token: {:?}", token_thing), | |
}, | |
// negated terms like "-tall" | |
2 => match token_thing { | |
[Token::Negation, Token::Word(w, _)] => { | |
terms.push((w, true)); | |
} | |
_ => panic!("Invalid token: {:?}", token_thing), | |
}, | |
// key-value pairs like "name:Hayden" | |
3 => match token_thing { | |
[Token::Word(k, _), Token::Colon, Token::Word(v, _)] => { | |
conditions.insert(*k, (*v, false)); | |
} | |
_ => panic!("Invalid token: {:?}", token_thing), | |
}, | |
// negated key-value pairs like "tags:-tall" | |
4 => match token_thing { | |
[Token::Word(k, _), Token::Colon, Token::Negation, Token::Word(v, _)] => { | |
conditions.insert(*k, (v, true)); | |
} | |
_ => panic!("Invalid token: {:?}", token_thing), | |
}, | |
_ => panic!("Invalid token: {:?}", token_thing), | |
}); | |
TermsConditions { terms, conditions } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment