Skip to content

Instantly share code, notes, and snippets.

@nebelgrau77
Last active July 8, 2020 09:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nebelgrau77/41d630a432c61e96f77a840b53d2aef8 to your computer and use it in GitHub Desktop.
Save nebelgrau77/41d630a432c61e96f77a840b53d2aef8 to your computer and use it in GitHub Desktop.
the Sørenson–Dice coefficient

the Sørensen–Dice coefficient calculator

https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient

TO DO: convert to take words as command line arguments with the following syntax: sdcoeff <word1> <word2>

Original quick draft in Python:

def get_bigrams(word): 
    bigrams = [word[n:n+2] for n in range(len(word)-1)]     
    return bigrams 

def sorenson_dice(word1, word2): 
    big_a = get_bigrams(word1) 
    big_b = get_bigrams(word2) 
    common = [bigram for bigram in big_a if bigram in big_b]  # not sure if this is error-proof
    return (2*len(common)) / (len(big_a)+len(big_b))

Final version in Rust:

use std::io;

fn main() {  

    println!("Insert first word:");

    let mut word_a = String::new();

    io::stdin().read_line(&mut word_a).expect("Failed to read input.");

    let word_a = word_a.trim();

    println!("Insert second word:");

    let mut word_b = String::new();

    io::stdin().read_line(&mut word_b).expect("Failed to read input.");

    let word_b = word_b.trim();

        let coeff: f32 = sd_coeff(&word_a, &word_b);
    
    println!("Sorensen-Dice coefficient is {:.03}", coeff);
      
}


fn sd_coeff(word_a: &str, word_b: &str) -> f32 {
    
    // calculate Sørensen–Dice coefficient

    let bigrams_a = get_bigrams(word_a);
    let bigrams_b = get_bigrams(word_b);
    
    let common = compare_bigrams(&bigrams_a, &bigrams_b);
    
    let coeff: f32 = (2.0 * common as f32) / (bigrams_a.len() + bigrams_b.len()) as f32;
    
    return coeff;
    
}

fn get_bigrams(word: &str) -> Vec<&str> {
    
    // split a word into bigrams

    let mut bigrams: Vec<&str> = Vec::new();
    
    for i in 0..(word.len()-1) {
        
        let bigram = &word[i..i+2];
        
        bigrams.push(bigram);
    }

    return bigrams;    
}

fn compare_bigrams(bigrams_a: &Vec<&str>, bigrams_b: &Vec<&str>) -> u8 {
    
    // count how many bigrams are common between two words

    let mut common: u8 = 0;
    
    for item_a in bigrams_a.iter() {
        for item_b in bigrams_b.iter() {
            if item_a == item_b {
                common += 1;
            }
        }
    }

    return common;     
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment