Skip to content

Instantly share code, notes, and snippets.

@foota
Created May 14, 2012 18:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save foota/2695610 to your computer and use it in GitHub Desktop.
Save foota/2695610 to your computer and use it in GitHub Desktop.
Edit distance
#include <iostream>
#include <string>
#include <bitset>
#include <ctime>
using namespace std;
// dynamic programming
const int SIZE = 100;
int edit_distance_dp(const string& str1, const string& str2)
{
static int d[SIZE][SIZE];
for (int i = 0; i < str1.size() + 1; i++) d[i][0] = i;
for (int i = 0; i < str2.size() + 1; i++) d[0][i] = i;
for (int i = 1; i < str1.size() + 1; i++)
for (int j = 1; j < str2.size() + 1; j++)
d[i][j] = min(min(d[i-1][j], d[i][j-1]) + 1, d[i-1][j-1] + (str1[i-1] == str2[j-1] ? 0 : 1));
return d[str1.size()][str2.size()];
}
// O(ND) algorithm
int edit_distance_ond(const string& str1, const string& str2)
{
static int V[SIZE];
int x, y;
int offset = str1.size();
V[offset + 1] = 0;
for (int D = 0; D <= str1.size() + str2.size(); D++) {
for (int k = -D; k <= D; k += 2) {
if (k == -D || k != D && V[k-1+offset] < V[k+1+offset]) x = V[k+1+offset];
else x = V[k-1+offset] + 1;
y = x - k;
while (x < str1.size() && y < str2.size() && str1[x] == str2[y]) {
x++;
y++;
}
V[k+offset] = x;
if (x >= str1.size() && y >= str2.size()) return D;
}
}
return -1;
}
// O(NP) algorithm
int snake(int k, int y, const string& str1, const string& str2)
{
int x = y - k;
while (x < str1.size() && y < str2.size() && str1[x] == str2[y]) {
x++;
y++;
}
return y;
}
int edit_distance_onp(const string& str1, const string& str2)
{
// required: s1->size() <= s2->size()
const string* const s1 = str1.size() > str2.size() ? &str2 : &str1;
const string* const s2 = str1.size() > str2.size() ? &str1 : &str2;
static int fp[SIZE];
int x, y, k, p;
int offset = s1->size() + 1;
int delta = s2->size() - s1->size();
for (int i = 0; i < SIZE; i++) fp[i] = -1;
for (p = 0; fp[delta + offset] != s2->size(); p++) {
for(k = -p; k < delta; k++)
fp[k + offset] = snake(k, max(fp[k-1+offset] + 1, fp[k+1+offset]), *s1, *s2);
for(k = delta + p; k > delta; k--)
fp[k + offset] = snake(k, max(fp[k-1+offset] + 1, fp[k+1+offset]), *s1, *s2);
fp[delta + offset] = snake(delta, max(fp[delta-1+offset] + 1, fp[delta+1+offset]), *s1, *s2);
}
return delta + (p - 1) * 2;
}
// bit-parallel
template<typename T>
int edit_distance_bit(const string& str1, const string& str2)
{
char s1[sizeof(T) * 8] = { ' ' };
char s2[sizeof(T) * 8] = { ' ' };
char *p1, *p2;
// required: str1.size() >= str2.size()
if (str1.size() >= str2.size()) { p1 = s1; p2 = s2; }
else { p1 = s2; p2 = s1; }
copy(str1.begin(), str1.end(), p1 + 1);
copy(str2.begin(), str2.end(), p2 + 1);
int m = strlen(s1);
int n = strlen(s2);
const T ONE = 1;
T Peq[256] = { 0 };
T Pv, Eq, Xv, Xh, Ph, Mh;
T Mv = 0;
int Score = m;
for (int i = 0; i < m; i++) {
Peq[s1[i]] |= ONE << i;
Pv |= (ONE << i);
}
for (int j = 0; j < n; j++) {
Eq = Peq[s2[j]];
Xv = Eq | Mv;
Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
Ph = Mv | ~(Xh | Pv);
Mh = Pv & Xh;
if (Ph & (ONE << (m - 1))) Score++;
else if (Mh & (ONE << (m - 1))) Score--;
Ph <<= ONE;
Pv = (Mh << ONE) | ~(Xv | Ph);
Mv = Ph & Xv;
}
return Score;
}
// bit-parallel (bitset)
template<size_t N>
const bitset<N> operator+(const bitset<N>& lhs, const bitset<N>& rhs)
{
bitset<N> a(lhs), b(rhs), ret(lhs ^ rhs);
for (b = (a & b) << 1, a = ret; b.any(); b = (a & b) << 1, a = ret) ret ^= b;
return ret;
}
template<size_t N>
int edit_distance_bitset(const string& str1, const string& str2)
{
char s1[N] = { ' ' };
char s2[N] = { ' ' };
char *p1, *p2;
// required: str1.size() >= str2.size()
if (str1.size() >= str2.size()) { p1 = s1; p2 = s2; }
else { p1 = s2; p2 = s1; }
copy(str1.begin(), str1.end(), p1 + 1);
copy(str2.begin(), str2.end(), p2 + 1);
int m = strlen(s1);
int n = strlen(s2);
const bitset<N> ONE(1);
bitset<N> Peq[256];
bitset<N> Pv, Mv, Eq, Xv, Xh, Ph, Mh;
int Score = m;
for (int i = 0; i < m; i++) {
Peq[s1[i]] |= ONE << i;
Pv |= (ONE << i);
}
for (int j = 0; j < n; j++) {
Eq = Peq[s2[j]];
Xv = Eq | Mv;
Xh = (((Eq & Pv) + Pv) ^ Pv) | Eq;
Ph = Mv | ~(Xh | Pv);
Mh = Pv & Xh;
if ((Ph & (ONE << (m - 1))).any()) Score++;
else if ((Mh & (ONE << (m - 1))).any()) Score--;
Ph <<= 1;
Pv = (Mh << 1) | ~(Xv | Ph);
Mv = Ph & Xv;
}
return Score;
}
// for test
void run(int (*func)(const string&, const string&), const string& arg1, const string& arg2, int num, const string& msg)
{
clock_t start, finish;
start = clock();
for (int i = 0; i < num - 1; i++) (*func)(arg1, arg2);
cout << msg << " : " << (*func)(arg1, arg2) << endl;
finish = clock();
cout << "Time: " << (double)(finish - start) / CLOCKS_PER_SEC << "s (" << num << " times)" << endl;
cout << endl;
}
int main()
{
string str1 = "agtcaaaagtcagtcagtcagtcagtcacagtcagaaggcatccaaccga";
string str2 = "ccgttagtcagaaacagtcagtcagtcagtcagtccagtcttaggcccgga";
cout << str1 << endl;
cout << str2 << endl;
run(edit_distance_dp, str1, str2, 100000, "dp ");
run(edit_distance_ond, str1, str2, 100000, "ond ");
run(edit_distance_onp, str1, str2, 100000, "onp ");
run(edit_distance_bit<unsigned long long>, str1, str2, 100000, "bit ");
run(edit_distance_bitset<60>, str1, str2, 100000, "bitset");
return 0;
}
@foota
Copy link
Author

foota commented May 14, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment