https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient
TO DO: convert to take words as command line arguments with the following syntax: sdcoeff <word1> <word2>
Original quick draft in Python:
def get_bigrams(word):
bigrams = [word[n:n+2] for n in range(len(word)-1)]
return bigrams
def sorenson_dice(word1, word2):
big_a = get_bigrams(word1)
big_b = get_bigrams(word2)
common = [bigram for bigram in big_a if bigram in big_b] # not sure if this is error-proof
return (2*len(common)) / (len(big_a)+len(big_b))
Final version in Rust:
use std::io;
fn main() {
println!("Insert first word:");
let mut word_a = String::new();
io::stdin().read_line(&mut word_a).expect("Failed to read input.");
let word_a = word_a.trim();
println!("Insert second word:");
let mut word_b = String::new();
io::stdin().read_line(&mut word_b).expect("Failed to read input.");
let word_b = word_b.trim();
let coeff: f32 = sd_coeff(&word_a, &word_b);
println!("Sorensen-Dice coefficient is {:.03}", coeff);
}
fn sd_coeff(word_a: &str, word_b: &str) -> f32 {
// calculate Sørensen–Dice coefficient
let bigrams_a = get_bigrams(word_a);
let bigrams_b = get_bigrams(word_b);
let common = compare_bigrams(&bigrams_a, &bigrams_b);
let coeff: f32 = (2.0 * common as f32) / (bigrams_a.len() + bigrams_b.len()) as f32;
return coeff;
}
fn get_bigrams(word: &str) -> Vec<&str> {
// split a word into bigrams
let mut bigrams: Vec<&str> = Vec::new();
for i in 0..(word.len()-1) {
let bigram = &word[i..i+2];
bigrams.push(bigram);
}
return bigrams;
}
fn compare_bigrams(bigrams_a: &Vec<&str>, bigrams_b: &Vec<&str>) -> u8 {
// count how many bigrams are common between two words
let mut common: u8 = 0;
for item_a in bigrams_a.iter() {
for item_b in bigrams_b.iter() {
if item_a == item_b {
common += 1;
}
}
}
return common;
}