Skip to content

Instantly share code, notes, and snippets.

@bicycle1885
Created March 30, 2016 20:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bicycle1885/69fd07b87f860536e65c6d2263a22f37 to your computer and use it in GitHub Desktop.
Save bicycle1885/69fd07b87f860536e65c6d2263a22f37 to your computer and use it in GitHub Desktop.
Myers' Approximate String Search Algorithm
# Approximate String Search
# =========================
#
# Myers, Gene. "A fast bit-vector algorithm for approximate string matching
# based on dynamic programming." Journal of the ACM (JACM) 46.3 (1999): 395-415.
#
"""
approx_search(text, query, k)
Search the end of approximately matching position of `query` in `text` will `≤ k` errors.
# Remarks
* Errors are substitutions, insertions, or deletions.
* Retruns a position `j` such that `min_i distance(query, text[i:j]) ≤ k` if
any, where `distance` is the Levenshtein distance; otherwise retruns `0`.
* The length of a query must be shorter or equal to 64.
"""
function approx_search(t::ASCIIString, p::ASCIIString, k::Int)
σ = 128
m = length(p)
n = length(t)
@assert m ≤ 64
# preprocess
Peq = zeros(UInt64, σ)
for i in 1:m
Peq[Int8(p[i])+1] |= one(UInt64) << (i - 1)
end
Pv = (UInt64(1) << m) - one(UInt64)
Mv = UInt64(0)
Score = m
for j in 1:n
Eq = Peq[Int8(t[j])+1]
Xv = Eq | Mv
Xh = (((Eq & Pv) + Pv) $ Pv) | Eq
Ph = Mv | ~(Xh | Pv)
Mh = Pv & Xh
if Ph & (one(UInt64) << (m - 1)) != 0
Score += 1
elseif Mh & (one(UInt64) << (m - 1)) != 0
Score -= 1
end
Ph <<= 1
Mh <<= 1
Pv = Mh | ~(Xv | Ph)
Mv = Ph & Xv
if Score ≤ k
return j
end
end
return 0
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment