Skip to content

Instantly share code, notes, and snippets.

@binzram
Created April 21, 2017 00:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save binzram/df94eafaa3b3e8ca07734ec30e5df58d to your computer and use it in GitHub Desktop.
Save binzram/df94eafaa3b3e8ca07734ec30e5df58d to your computer and use it in GitHub Desktop.
package ch.bbc.wikicompare.algorythms;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class DiceCoefficent {
public double diceCoefficient(String s1, String s2)
{
Set<String> nx = new HashSet<String>();
Set<String> ny = new HashSet<String>();
for (int i=0; i < s1.length()-1; i++) {
char x1 = s1.charAt(i);
char x2 = s1.charAt(i+1);
String tmp = "" + x1 + x2;
nx.add(tmp);
}
for (int j=0; j < s2.length()-1; j++) {
char y1 = s2.charAt(j);
char y2 = s2.charAt(j+1);
String tmp = "" + y1 + y2;
ny.add(tmp);
}
Set<String> intersection = new HashSet<String>(nx);
intersection.retainAll(ny);
double totcombigrams = intersection.size();
return (2*totcombigrams) / (nx.size()+ny.size());
}
public double diceCoefficientOptimized(String s, String t)
{
// Verifying the input:
if (s == null || t == null)
return 0;
// Quick check to catch identical objects:
if (s == t)
return 1;
// avoid exception for single character searches
if (s.length() < 2 || t.length() < 2)
return 0;
// Create the bigrams for string s:
final int n = s.length()-1;
final int[] sPairs = new int[n];
for (int i = 0; i <= n; i++)
if (i == 0)
sPairs[i] = s.charAt(i) << 16;
else if (i == n)
sPairs[i-1] |= s.charAt(i);
else
sPairs[i] = (sPairs[i-1] |= s.charAt(i)) << 16;
// Create the bigrams for string t:
final int m = t.length()-1;
final int[] tPairs = new int[m];
for (int i = 0; i <= m; i++)
if (i == 0)
tPairs[i] = t.charAt(i) << 16;
else if (i == m)
tPairs[i-1] |= t.charAt(i);
else
tPairs[i] = (tPairs[i-1] |= t.charAt(i)) << 16;
// Sort the bigram lists:
Arrays.sort(sPairs);
Arrays.sort(tPairs);
// Count the matches:
int matches = 0, i = 0, j = 0;
while (i < n && j < m)
{
if (sPairs[i] == tPairs[j])
{
matches += 2;
i++;
j++;
}
else if (sPairs[i] < tPairs[j])
i++;
else
j++;
}
return (double)matches/(n+m);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment