Skip to content

Instantly share code, notes, and snippets.

@spundun
Created March 31, 2015 05:41
Show Gist options
  • Save spundun/36381fdd830f1da282a7 to your computer and use it in GitHub Desktop.
Save spundun/36381fdd830f1da282a7 to your computer and use it in GitHub Desktop.
Similarity based on Levenshtein Distance
//
// main.cpp
// StringSimilarity
//
// Created by Spundun Bhatt on 3/30/15.
// Copyright (c) 2015 Spundun Bhatt. All rights reserved.
//
#include <iostream>
#include <memory>
#include <vector>
#include <cmath>
using namespace std;
size_t LevDistance(string s1, string s2) {
size_t l1 = s1.length();
size_t l2 = s2.length();
if(l1==0) return l2;
if(l2==0) return l1;
shared_ptr<vector<size_t>> prev(new vector<size_t>(l1)), cur(new vector<size_t>(l1));
//////////////////////////////////
// This code populates the first row.
if (s1[0]==s2[0]) {
(*prev)[0] = 0;
} else {
(*prev)[0] = 1;
}
for (size_t i = 1; i< l1; i++) {
if (s1[i] == s2[0]) {
(*prev)[i] = i;
} else {
(*prev)[i] = min(i+1, (*prev)[i-1]+1);
}
}
//////////////////////////////////
for (size_t i=1; i<l2; i++) {
if (s1[0] == s2[i]) {
(*cur)[0] = i;
}else {
(*cur)[0] = min(i+1, (*prev)[0]+1);
}
for (size_t j=1; j<l1; j++) {
if(s1[j] == s2[i]) {
(*cur)[j] = (*prev)[j-1];
} else {
(*cur)[j] = min((*prev)[j]+1, (*cur)[j-1] + 1);
}
}
cur.swap(prev);
}
return (*prev)[l1-1];
}
double SimExp(string s1, string s2) {
if (s1.length()==0 && s2.length()==0) {
return 1;
}
return exp(-((double)LevDistance(s1, s2))/(double)max(s1.length(),s2.length()));
}
double SimLinear(string s1, string s2) {
if (s1.length()==0 && s2.length()==0) {
return 1;
}
return 1-((double)LevDistance(s1, s2))/(double)(max(s1.length(),s2.length()));
}
int main(int argc, const char * argv[]) {
while(true){
string s1, s2;
//cin>> s1;
//cin>> s2;
getline(cin, s1);
getline(cin, s2);
cout << "Two Strings \"" << s1 << "\", \"" << s2 << "\"" << endl <<
"... Similarity(exp): " << SimExp(s1, s2) << " ... Similarity(linear): " << SimLinear(s1, s2) << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment