Last active
August 2, 2018 03:09
-
-
Save ruliana/cb7b32e8da149aabb09bc3340ab1046d to your computer and use it in GitHub Desktop.
String Edit Distance in Racket using recursion and caching (dynamic programming like, but still recursive)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(module+ test | |
(require rackunit)) | |
(module+ test | |
(check-equal? (editions "" "") empty) | |
(check-equal? (editions "a" "a") (list (op-match #\a))) | |
(check-equal? (editions "ab" "ab") (list (op-match #\a) (op-match #\b))) | |
(check-equal? (editions "a" "") (list (op-insert #\a))) | |
(check-equal? (editions "" "b") (list (op-delete #\b))) | |
(check-equal? (editions "abc" "") (list (op-insert #\a) (op-insert #\b) (op-insert #\c))) | |
(check-equal? (editions "" "abc") (list (op-delete #\a) (op-delete #\b) (op-delete #\c))) | |
(check-equal? (editions "abc" "a") (list (op-match #\a) (op-insert #\b) (op-insert #\c))) | |
(check-equal? (editions "c" "abc") (list (op-delete #\a) (op-delete #\b) (op-match #\c))) | |
(check-equal? (editions "ac" "abc") (list (op-match #\a) (op-delete #\b) (op-match #\c))) | |
(check-equal? (editions "abcd" "xabxdx") (list (op-delete #\x) | |
(op-match #\a) | |
(op-match #\b) | |
(op-replace #\x #\c) | |
(op-match #\d) | |
(op-delete #\x))) | |
(check-equal? (editions "a" "b") (list (op-replace #\b #\a))) | |
(check-equal? (editions "pedreiro" "padeiro") (list (op-match #\p) | |
(op-replace #\a #\e) | |
(op-match #\d) | |
(op-insert #\r) | |
(op-match #\e) | |
(op-match #\i) | |
(op-match #\r) | |
(op-match #\o))) | |
; The test below takes a lot of time without the caching strategy | |
(check-equal? (distance (editions "you should not" "thou shall not")) 5)) | |
(struct op-match (letter) #:transparent) | |
(struct op-delete (letter) #:transparent) | |
(struct op-insert (letter) #:transparent) | |
(struct op-replace (source-letter target-letter) #:transparent) | |
; How each operations costs? | |
; In this exemple, every operation costs 1 | |
; but match, which is free. | |
(define (op-cost operation) | |
(match operation | |
[(op-match _) 0] | |
[_ 1])) | |
(define (distance op-list) | |
(for/sum ([e op-list]) (op-cost e))) | |
; A helper function to make cache simple | |
(define (make-cache) | |
(define cache (make-hash)) | |
(curry dict-ref! cache)) | |
(define (editions pattern other) | |
(define cache-hit! (make-cache)) | |
(define (loop p p-pos o o-pos) | |
(define (matching letter left right) | |
(cons (op-match letter) (loop left (add1 p-pos) right (add1 o-pos)))) | |
(define (insert letter left right) | |
(cons (op-insert letter) (loop left (add1 p-pos) right o-pos))) | |
(define (delete letter left right) | |
(cons (op-delete letter) (loop left p-pos right (add1 o-pos)))) | |
(define (replace letter-a letter-b left right) | |
(cons (op-replace letter-a letter-b) (loop left (add1 p-pos) right (add1 o-pos)))) | |
(define (step p o) | |
(match (list p o) | |
[(list '() '()) '()] | |
[(list (list a p-rest ...) '()) (insert a p-rest empty)] | |
[(list '() (list a o-rest ...)) (delete a empty o-rest)] | |
[(list (list a p-rest ...) (list a o-rest ...)) (matching a p-rest o-rest)] | |
[(list (list a p-rest ...) (list b o-rest ...)) (argmin distance | |
(list | |
(replace b a p-rest o-rest) | |
(delete b p o-rest) | |
(insert a p-rest o)))])) | |
(define key (vector p-pos o-pos)) | |
(cache-hit! key (thunk (step p o)))) | |
(loop (string->list pattern) 1 (string->list other) 1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment