Skip to content

Instantly share code, notes, and snippets.

@plukevdh
Created November 7, 2012 05:15
Show Gist options
  • Save plukevdh/4029680 to your computer and use it in GitHub Desktop.
Save plukevdh/4029680 to your computer and use it in GitHub Desktop.
memoized levenshtein
-module(levenshtein).
-export([distance/2]).
store_result(Key, Value, Cache) ->
{Value, dict:store(Key, Value, Cache)}.
distance(String1, String2) ->
{List,_} = distance(String1, String2, dict:new()),
List.
distance(String1, []=String2, Cache) ->
store_result({String1, String2}, string:len(String1), Cache);
distance([]=String1, String2, Cache) ->
store_result({String1, String2}, string:len(String2), Cache);
distance([X|Rest1], [X|Rest2], Cache) ->
distance(Rest1,Rest2,Cache);
distance([_|Rest1]=String1, [_|Rest2]=String2, Cache) ->
case dict:is_key({String1,String2}, Cache) of
true -> {dict:fetch({String1,String2}, Cache), Cache};
false ->
{L1,C1} = distance(String1, Rest2, Cache),
{L2,C2} = distance(Rest1, String2, C1),
{L3,C3} = distance(Rest1, Rest2, C2),
MinDist = lists:min([L1, L2, L3]) + 1,
store_result({String1,String2}, MinDist, C3)
end.
class Levenshtein
attr_reader :distance
def initialize(first, second)
@from = first
@to = second
@memo = {}
end
def distance
@distance ||= calculate_distance(@from, @to)
end
private
def put_cache(from,to, val)
@memo["#{from}_#{to}"] = val
val
end
def get_cache(from,to)
@memo["#{from}_#{to}"]
end
# mimic erlang's [Head|Tail] construct
def head_rest(val)
return val[0], val[1..-1]
end
def calculate_distance(from, to)
return put_cache(from,to,0) if from == to
return put_cache(from,to,from.length) if to.empty?
return put_cache(from,to,to.length) if from.empty?
from_h, from_t = head_rest(from)
to_h, to_t = head_rest(to)
return calculate_distance(from_t, to_t) if from_h == to_h
cache_val = get_cache(from,to)
if cache_val.nil?
val = [
calculate_distance(from_t, to),
calculate_distance(from, to_t),
calculate_distance(from_t, to_t)
].min
put_cache(from,to,val+1)
else
cache_val
end
end
end
@plukevdh
Copy link
Author

plukevdh commented Nov 7, 2012

Benchmark.bmbm do |bm|
  bm.report("simple") { Levenshtein.new("kitten", "sitting").distance }
  bm.report("more complex") { Levenshtein.new("space balls", "base spalls").distance }
end

turns out:

Rehearsal ------------------------------------------------
simple         0.000000   0.000000   0.000000 (  0.001050)
more complex   0.000000   0.000000   0.000000 (  0.002877)
--------------------------------------- total: 0.000000sec

                   user     system      total        real
simple         0.010000   0.000000   0.010000 (  0.005302)
more complex   0.000000   0.000000   0.000000 (  0.002935)

compare with https://gist.github.com/4029376#gistcomment-593533

@plukevdh
Copy link
Author

plukevdh commented Nov 7, 2012

Single run ({time in microsec, result}).

3> timer:tc(levenshtein, distance, ["kitten", "sitting"]).
{1557,3}
4> timer:tc(levenshtein, distance, ["space balls", "base spalls"]).
{678,5}

@plukevdh
Copy link
Author

plukevdh commented Nov 7, 2012

Ruby results for 100 times averaged against the following words:

Levenshtein.new("kitten", "sitting").distance
Levenshtein.new("space balls", "base spalls").distance
Levenshtein.new("rosettacode", "raisethysword").distance
                      user     system      total        real
avg simple        0.001200   0.000000   0.001200 (  0.001253)
avg complex       0.002800   0.000000   0.002800 (  0.002908)
avg matching      0.003400   0.000000   0.003400 (  0.003521)

@plukevdh
Copy link
Author

plukevdh commented Nov 7, 2012

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment