Created
November 7, 2010 18:45
-
-
Save socantre/666314 to your computer and use it in GitHub Desktop.
compute the Levenshtein distance between an n length string and an m length string in O(n) memory (not O(nm)).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iterator> | |
#include <vector> | |
template<typename RandomAccessIterator1,typename RandomAccessIterator2,typename Comparator> | |
int levenshtein_distance(RandomAccessIterator1 begin1,RandomAccessIterator1 end1, | |
RandomAccessIterator2 begin2,RandomAccessIterator2 end2, | |
Comparator cmp) | |
{ | |
std::vector<int> values; | |
for(int i=0;i<(end1-begin1 + 1);++i) { | |
values.push_back(i); | |
} | |
for(RandomAccessIterator2 i=begin2;i<end2;++i) { | |
int prev_calc = i-begin2 + 1; | |
for(RandomAccessIterator1 j=begin1;j<end1;++j) { | |
int new_calc; | |
if(cmp(*i,*j)) { | |
new_calc = values[j-begin1]; | |
} else { | |
new_calc = 1+std::min(std::min(values[j-begin1],values[j-begin1+1]),prev_calc); | |
} | |
values[j-begin1] = prev_calc; | |
prev_calc = new_calc; | |
} | |
values[end1-begin1] = prev_calc; | |
} | |
return values[end1-begin1]; | |
} | |
template<typename RandomAccessIterator1,typename RandomAccessIterator2> | |
int levenshtein_distance(RandomAccessIterator1 begin1,RandomAccessIterator1 end1, | |
RandomAccessIterator2 begin2,RandomAccessIterator2 end2) | |
{ | |
return levenshtein_distance(begin1,end1,begin2,end2, | |
std::equal_to<typename std::iterator_traits<RandomAccessIterator1>::value_type>()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment