Created
May 10, 2011 01:41
-
-
Save aleph0ne/963774 to your computer and use it in GitHub Desktop.
Fast Edit Distance (Modified Levenshtein Distance)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/newlisp | |
; A super-fast implementation of the Levenshtein Distance (Edit Distance) algorithm | |
; | |
; Requires: | |
; https://github.com/causes/puzzles/raw/master/word_friends/word.list | |
; | |
; http://en.wikipedia.org/wiki/Levenshtein_distance | |
; | |
; Pass in a word and get back a list | |
; (under 350 milliseconds per 'word on a MacBook Air, at least...) | |
; | |
; | |
; Usage: | |
; (setf found-words (GetFriends "benefit" word-list)) | |
; | |
; -> ("benefit" "benefic" "benefits") | |
; | |
(set 'word-list (chop (parse (read-file "word.list") "\n"))) | |
(set 'alphabet (explode "abcdefghijklmnopqrstuvwxyz")) | |
(define (GetFriends word word-list) | |
(set 'new-words '()) ; start at empty | |
; | |
; Deletes (removing one letter) | |
; | |
(for (i 0 (- (length word) 1)) | |
(setf tmpWord word) | |
(pop tmpWord i) | |
(push tmpWord new-words -1) | |
) ; .03ms per word | |
; | |
; Swaps (swap adjacent letters) | |
; | |
(for (i 0 (- (length word) 1)) | |
(set 'tmpWord word) | |
(push (push (pop tmpWord i) tmpWord (inc i)) new-words) | |
) ; .03 ms | |
; | |
; Modifies (one letter to another) | |
; | |
(for (i 0 (- (length word) 1)) | |
(set 'tmpWord word) | |
(dolist (a alphabet) | |
(setf (tmpWord i) a) | |
(push tmpWord new-words -1) | |
) | |
) ; .8 ms | |
; | |
; Inserts (add a letter) | |
; | |
(for (i 0 (length word)) | |
(dolist (a alphabet) | |
(set 'tmpWord word) | |
(push (push a tmpWord i) new-words -1) | |
) | |
) | |
; | |
; remove the friend we came in with (i.e. benefit is not a friend of benefit) | |
(pop new-words) | |
; return the friends list | |
(intersect new-words word-list) ; ~ 200 ms (slowest part of the code) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment