-
-
Save skaunov/2e359a10b0414fc3ce54ea5a11132738 to your computer and use it in GitHub Desktop.
snapshot of lifetimekata (Finale) solution process for discussion on macrokata Hygiene exercise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(iter_intersperse)] | |
use require_lifetimes::require_lifetimes; | |
#[derive(Debug, PartialEq, Eq, Clone)] | |
enum MatcherToken<'t> { | |
/// This is just text without anything special. | |
RawText(&'t str), | |
/// This is when text could be any one of multiple strings. It looks like `(one|two|three)`, where `one`, `two` or `three` are the allowed strings. | |
OneOfText(Vec<&'t str>), | |
/// This is when you're happy to accept any single character. It looks like `.` | |
WildCard, | |
} | |
#[derive(Debug, PartialEq, Eq)] | |
struct Matcher<'m> { | |
/// This is the actual text of the matcher | |
text: &'m str, | |
/// This is a vector of the tokens inside the expression. | |
tokens: Vec<MatcherToken<'m>>, | |
/// This keeps track of the most tokens that this matcher has matched. | |
most_tokens_matched: usize, | |
} | |
impl Matcher<'_> { | |
/// This should take a string reference, and return a `Matcher` which has parsed that reference. | |
#[require_lifetimes] | |
fn new<'text>(text: &'text str) -> Option<Matcher<'text>> { | |
// Now I know unmatche parenthesis shouldn't go into `RawText`. Any other surprises? https://github.com/tfpk/lifetimekata/issues/25 | |
// Still not clear for example what `Option` should be return for ")(d|e)" string. | |
let mut pretokens = Vec::default(); | |
let mut parse_to = text; | |
while parse_to.contains('(') { | |
match parse_to.find(')') { | |
Some(index_closing) => { | |
let index_opening = parse_to.find('(').unwrap(); | |
if index_closing < index_opening { | |
pretokens.push(MatcherToken::RawText(&parse_to[..=index_closing])); | |
parse_to = &parse_to[index_closing + 1..]; | |
} | |
else { | |
pretokens.push(MatcherToken::RawText(&parse_to[..index_opening])); | |
// we will need it sorted to be eager in `match_string` | |
let mut patterns_to_push = parse_to[index_opening + 1..index_closing].split('|').collect::<Vec<&str>>(); | |
patterns_to_push.sort_unstable_by(|a, b| b.len().partial_cmp(&a.len()).unwrap()); | |
pretokens.push(MatcherToken::OneOfText(patterns_to_push)); | |
parse_to = &parse_to[index_closing + 1..]; | |
} | |
} | |
None => { | |
pretokens.push(MatcherToken::RawText(parse_to)); | |
parse_to = "" | |
} | |
} | |
} | |
// glue consecutive `RawText`s? | |
// while pretokens.iter().peekable().peek()//(MatcherToken::RawText)..check_next(RawText) { | |
let glued_rawtexts = || { | |
for (ind, pretoken) in pretokens.iter().enumerate() { | |
if let MatcherToken::RawText(_) = pretoken {if let Some(MatcherToken::RawText(_)) = pretokens.get(ind + 1) {return false;}} | |
// else {return true;} | |
} | |
true | |
}; | |
// let mut glued_rawtexts = Default::default(); | |
while !glued_rawtexts() {todo!("find consecutive `RawText`s, concat 2nd to previous, remove the second");} | |
let mut tokens: Vec<MatcherToken> = Default::default(); | |
pretokens.into_iter().for_each(|pretoken| { | |
if let MatcherToken::RawText(string_to_process) = pretoken { | |
for elem in string_to_process.split('.').map(|x| MatcherToken::RawText(x)).intersperse(MatcherToken::WildCard) { | |
tokens.push(elem) | |
} | |
} | |
else {tokens.push(pretoken)} | |
}); // ~~TODO~~ *check that no need to flatten after* | |
Some(Matcher { text, tokens, most_tokens_matched: Default::default() }) | |
} | |
/// This should take a string, and return a vector of tokens, and the corresponding part of the given string. For examples, see the test cases below. | |
#[require_lifetimes] | |
fn match_string<'s>(&mut self, string: &'s str) -> Vec<(&'s MatcherToken, &'s str)> { | |
// can it be ambiguous, so I would need to go back to some point to find other matching variants? | |
// feels like if make it eager the problem would be minimized | |
// let's yet assume that if `RawText` can't be located it just `panic` | |
let mut result: Vec<(&MatcherToken, &str)> = Default::default(); | |
let mut position_current = Default::default(); | |
// self.tokens.sort_unstable_by(|a, b| b.len().p(a)); | |
for (token_index, token) in self.tokens.iter/* _mut */().enumerate() { | |
let string_remain = &string[position_current..]; | |
match token { | |
MatcherToken::RawText(exact) => { | |
if !string_remain.starts_with::<&str>(exact.as_ref()) {panic!("#result 'should we _starts_with_ or `find`?'")} | |
position_current += exact.len(); | |
result.push((&token, &string_remain[..position_current])); | |
} | |
MatcherToken::OneOfText(tokens_oneof) => { | |
// todo!("sort vec from longest"); | |
// done in `new` method | |
for exact in tokens_oneof {if string_remain.starts_with::<&str>(exact.as_ref()) { | |
position_current += exact.len(); | |
result.push((&token, &string_remain[..position_current])); | |
break; | |
}} | |
panic!("#result no variants suited") | |
} | |
MatcherToken::WildCard => { | |
let mut i = 1; | |
macro_rules! helper { | |
() => { | |
match string_remain.find(*exact) { | |
Some(wcard_len) => { | |
position_current += wcard_len; | |
result.push((&token, &string_remain[..position_current])); | |
true | |
} | |
None => false | |
} | |
}; | |
} | |
// let mut helper = |exact| { | |
// match string_remain.find(exact) { | |
// Some(wcard_len) => { | |
// position_current += wcard_len; | |
// result.push((&token, &string_remain[..position_current])); | |
// true | |
// } | |
// None => false | |
// } | |
// }; | |
// while let Some(token_after_wcard) = self.tokens.get(token_index + i) { | |
// while let token_after_wcard = self.tokens[token_index + i] { | |
while token_index + i < self.tokens.len() { | |
let token_after_wcard = &self.tokens[token_index + i]; | |
match token_after_wcard { | |
MatcherToken::WildCard => {} | |
MatcherToken::RawText(exact) => { | |
dbg!(exact); | |
let _ = helper!(); | |
panic!("#result") | |
} | |
MatcherToken::OneOfText(exacts) => { | |
// todo!("sort vec from longest"); | |
// done in `new` method | |
for exact in exacts { | |
// let found = helper!(); | |
dbg!(exact); | |
if helper!() {break;} | |
} | |
panic!("#result") | |
} | |
} | |
i += 1; | |
} | |
// if we got `None` entire `string_remain` matches `WildCard` | |
result.push((&token, &string_remain)); | |
break; // only `WildCard`s left in `self.tokens` | |
} | |
} | |
} | |
result | |
} | |
} | |
fn main() { | |
unimplemented!() | |
} | |
#[cfg(test)] | |
mod test { | |
use super::{Matcher, MatcherToken}; | |
#[test] | |
fn simple_test() { | |
let match_string = "abc(d|e|f).".to_string(); | |
let mut matcher = Matcher::new(&match_string).unwrap(); | |
assert_eq!(matcher.most_tokens_matched, 0); | |
{ | |
let candidate1 = "abcge".to_string(); | |
let result = matcher.match_string(&candidate1); | |
assert_eq!(result, vec![(&MatcherToken::RawText("abc"), "abc"),]); | |
assert_eq!(matcher.most_tokens_matched, 1); | |
} | |
{ | |
// Change 'e' to '💪' if you want to test unicode. | |
let candidate1 = "abcde".to_string(); | |
let result = matcher.match_string(&candidate1); | |
assert_eq!( | |
result, | |
vec![ | |
(&MatcherToken::RawText("abc"), "abc"), | |
(&MatcherToken::OneOfText(vec!["d", "e", "f"]), "d"), | |
(&MatcherToken::WildCard, "e") // or '💪' TODO | |
] | |
); | |
assert_eq!(matcher.most_tokens_matched, 3); | |
} | |
} | |
#[test] | |
fn broken_matcher() { | |
let match_string = "abc(d|e|f.".to_string(); | |
let matcher = Matcher::new(&match_string); | |
assert_eq!(matcher, None); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment