Skip to content

Instantly share code, notes, and snippets.

@gleicon
Created December 16, 2022 18:36
Show Gist options
  • Save gleicon/4155f45d6003bd13cac1085abd2a2516 to your computer and use it in GitHub Desktop.
Save gleicon/4155f45d6003bd13cac1085abd2a2516 to your computer and use it in GitHub Desktop.
rust and symspell
[package]
name = "aho-exploration"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
aho-corasick = "0.7.19"
symspell = "0.4.3"
use aho_corasick::AhoCorasickBuilder;
use symspell::{AsciiStringStrategy, SymSpell, Verbosity};
use std::time::{Duration, Instant};
fn main() {
let patterns = &["apple", "maple", "snapple"];
let haystack = "Nobody likes maple in their apple flavored Snapple.";
let haystack2 = "Nobody likes mapleapple flavored Snapple.";
let ac = AhoCorasickBuilder::new()
.ascii_case_insensitive(true)
.build(patterns);
let mut matches = vec![];
for mat in ac.find_iter(haystack2) {
println!("{:?} - pattern {} -> haystack {}", mat, patterns[mat.pattern()], &haystack2[mat.start()..mat.end()]);
matches.push((mat.pattern(), mat.start(), mat.end()));
}
println!("matches: {:?}", matches);
// assert_eq!(matches, vec![
// (1, 13, 18),
// (0, 28, 33),
// (2, 43, 50),
// ]);
let mut symspell: SymSpell<AsciiStringStrategy> = SymSpell::default();
symspell.load_dictionary("data/frequency_dictionary_en_82_765.txt", 0, 1, " ");
symspell.load_bigram_dictionary(
"./data/frequency_bigramdictionary_en_243_342.txt",
0,
2,
" "
);
let start = Instant::now();
let suggestions = symspell.lookup("roket", Verbosity::Top, 2);
println!("sentence 0: {:?}", suggestions);
let sentence = "whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixtgrade and ins pired him";
let compound_suggestions = symspell.lookup_compound(sentence, 2);
println!("sentence 1: {:?}", compound_suggestions);
let sentence = "whereisthelove";
let segmented = symspell.word_segmentation(sentence, 2);
println!("sentence 2: {:?}", segmented);
let duration = start.elapsed();
println!("Time elapsed in expensive_function() is: {:?}", duration);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment