Skip to content

Instantly share code, notes, and snippets.

@skaunov

skaunov/main.rs Secret

Created June 7, 2023 10:28
Show Gist options
  • Save skaunov/2e359a10b0414fc3ce54ea5a11132738 to your computer and use it in GitHub Desktop.
Save skaunov/2e359a10b0414fc3ce54ea5a11132738 to your computer and use it in GitHub Desktop.
snapshot of lifetimekata (Finale) solution process for discussion on macrokata Hygiene exercise
#![feature(iter_intersperse)]
use require_lifetimes::require_lifetimes;
#[derive(Debug, PartialEq, Eq, Clone)]
enum MatcherToken<'t> {
/// This is just text without anything special.
RawText(&'t str),
/// This is when text could be any one of multiple strings. It looks like `(one|two|three)`, where `one`, `two` or `three` are the allowed strings.
OneOfText(Vec<&'t str>),
/// This is when you're happy to accept any single character. It looks like `.`
WildCard,
}
#[derive(Debug, PartialEq, Eq)]
struct Matcher<'m> {
/// This is the actual text of the matcher
text: &'m str,
/// This is a vector of the tokens inside the expression.
tokens: Vec<MatcherToken<'m>>,
/// This keeps track of the most tokens that this matcher has matched.
most_tokens_matched: usize,
}
impl Matcher<'_> {
/// This should take a string reference, and return a `Matcher` which has parsed that reference.
#[require_lifetimes]
fn new<'text>(text: &'text str) -> Option<Matcher<'text>> {
// Now I know unmatche parenthesis shouldn't go into `RawText`. Any other surprises? https://github.com/tfpk/lifetimekata/issues/25
// Still not clear for example what `Option` should be return for ")(d|e)" string.
let mut pretokens = Vec::default();
let mut parse_to = text;
while parse_to.contains('(') {
match parse_to.find(')') {
Some(index_closing) => {
let index_opening = parse_to.find('(').unwrap();
if index_closing < index_opening {
pretokens.push(MatcherToken::RawText(&parse_to[..=index_closing]));
parse_to = &parse_to[index_closing + 1..];
}
else {
pretokens.push(MatcherToken::RawText(&parse_to[..index_opening]));
// we will need it sorted to be eager in `match_string`
let mut patterns_to_push = parse_to[index_opening + 1..index_closing].split('|').collect::<Vec<&str>>();
patterns_to_push.sort_unstable_by(|a, b| b.len().partial_cmp(&a.len()).unwrap());
pretokens.push(MatcherToken::OneOfText(patterns_to_push));
parse_to = &parse_to[index_closing + 1..];
}
}
None => {
pretokens.push(MatcherToken::RawText(parse_to));
parse_to = ""
}
}
}
// glue consecutive `RawText`s?
// while pretokens.iter().peekable().peek()//(MatcherToken::RawText)..check_next(RawText) {
let glued_rawtexts = || {
for (ind, pretoken) in pretokens.iter().enumerate() {
if let MatcherToken::RawText(_) = pretoken {if let Some(MatcherToken::RawText(_)) = pretokens.get(ind + 1) {return false;}}
// else {return true;}
}
true
};
// let mut glued_rawtexts = Default::default();
while !glued_rawtexts() {todo!("find consecutive `RawText`s, concat 2nd to previous, remove the second");}
let mut tokens: Vec<MatcherToken> = Default::default();
pretokens.into_iter().for_each(|pretoken| {
if let MatcherToken::RawText(string_to_process) = pretoken {
for elem in string_to_process.split('.').map(|x| MatcherToken::RawText(x)).intersperse(MatcherToken::WildCard) {
tokens.push(elem)
}
}
else {tokens.push(pretoken)}
}); // ~~TODO~~ *check that no need to flatten after*
Some(Matcher { text, tokens, most_tokens_matched: Default::default() })
}
/// This should take a string, and return a vector of tokens, and the corresponding part of the given string. For examples, see the test cases below.
#[require_lifetimes]
fn match_string<'s>(&mut self, string: &'s str) -> Vec<(&'s MatcherToken, &'s str)> {
// can it be ambiguous, so I would need to go back to some point to find other matching variants?
// feels like if make it eager the problem would be minimized
// let's yet assume that if `RawText` can't be located it just `panic`
let mut result: Vec<(&MatcherToken, &str)> = Default::default();
let mut position_current = Default::default();
// self.tokens.sort_unstable_by(|a, b| b.len().p(a));
for (token_index, token) in self.tokens.iter/* _mut */().enumerate() {
let string_remain = &string[position_current..];
match token {
MatcherToken::RawText(exact) => {
if !string_remain.starts_with::<&str>(exact.as_ref()) {panic!("#result 'should we _starts_with_ or `find`?'")}
position_current += exact.len();
result.push((&token, &string_remain[..position_current]));
}
MatcherToken::OneOfText(tokens_oneof) => {
// todo!("sort vec from longest");
// done in `new` method
for exact in tokens_oneof {if string_remain.starts_with::<&str>(exact.as_ref()) {
position_current += exact.len();
result.push((&token, &string_remain[..position_current]));
break;
}}
panic!("#result no variants suited")
}
MatcherToken::WildCard => {
let mut i = 1;
macro_rules! helper {
() => {
match string_remain.find(*exact) {
Some(wcard_len) => {
position_current += wcard_len;
result.push((&token, &string_remain[..position_current]));
true
}
None => false
}
};
}
// let mut helper = |exact| {
// match string_remain.find(exact) {
// Some(wcard_len) => {
// position_current += wcard_len;
// result.push((&token, &string_remain[..position_current]));
// true
// }
// None => false
// }
// };
// while let Some(token_after_wcard) = self.tokens.get(token_index + i) {
// while let token_after_wcard = self.tokens[token_index + i] {
while token_index + i < self.tokens.len() {
let token_after_wcard = &self.tokens[token_index + i];
match token_after_wcard {
MatcherToken::WildCard => {}
MatcherToken::RawText(exact) => {
dbg!(exact);
let _ = helper!();
panic!("#result")
}
MatcherToken::OneOfText(exacts) => {
// todo!("sort vec from longest");
// done in `new` method
for exact in exacts {
// let found = helper!();
dbg!(exact);
if helper!() {break;}
}
panic!("#result")
}
}
i += 1;
}
// if we got `None` entire `string_remain` matches `WildCard`
result.push((&token, &string_remain));
break; // only `WildCard`s left in `self.tokens`
}
}
}
result
}
}
fn main() {
unimplemented!()
}
#[cfg(test)]
mod test {
use super::{Matcher, MatcherToken};
#[test]
fn simple_test() {
let match_string = "abc(d|e|f).".to_string();
let mut matcher = Matcher::new(&match_string).unwrap();
assert_eq!(matcher.most_tokens_matched, 0);
{
let candidate1 = "abcge".to_string();
let result = matcher.match_string(&candidate1);
assert_eq!(result, vec![(&MatcherToken::RawText("abc"), "abc"),]);
assert_eq!(matcher.most_tokens_matched, 1);
}
{
// Change 'e' to '💪' if you want to test unicode.
let candidate1 = "abcde".to_string();
let result = matcher.match_string(&candidate1);
assert_eq!(
result,
vec![
(&MatcherToken::RawText("abc"), "abc"),
(&MatcherToken::OneOfText(vec!["d", "e", "f"]), "d"),
(&MatcherToken::WildCard, "e") // or '💪' TODO
]
);
assert_eq!(matcher.most_tokens_matched, 3);
}
}
#[test]
fn broken_matcher() {
let match_string = "abc(d|e|f.".to_string();
let matcher = Matcher::new(&match_string);
assert_eq!(matcher, None);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment