-
-
Save lojic/720d0b816c2e695a23b565d84a64de65 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
(define (words text) (regexp-match* #rx"[a-z]+" (string-downcase text))) | |
(define (train features) | |
(define model (make-hash)) | |
(for ([f features]) (hash-update! model f add1 1)) model) | |
(define nwords (train (words (file->string "big.txt")))) | |
(define alphabet "abcdefghijklmnopqrstuvwxyz") | |
(define (edits1 word) | |
(let* ([splits (for/list ([i (in-range (+ (string-length word) 1))]) | |
(cons (substring word 0 i) (substring word i)))] | |
[deletes (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 0)) | |
(string-append a (substring b 1)))] | |
[transposes (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 1)) | |
(string-append a (substring b 1 2) (substring b 0 1) (substring b 2)))] | |
[replaces (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 0) | |
[c alphabet]) | |
(string-append a (string c) (substring b 1)))] | |
[inserts (for*/set ([(a b) (in-dict splits)] | |
[c alphabet]) | |
(string-append a (string c) b))]) | |
(set-union deletes transposes replaces inserts))) | |
(define (known-edits2 word) | |
(for*/set ([e1 (edits1 word)] | |
[e2 (edits1 e1)] #:when (hash-has-key? nwords e2)) | |
e2)) | |
(define (known words) | |
(for/set ([w words] #:when (hash-has-key? nwords w)) w)) | |
; If the set argument is non-empty, return it; otherwise, return #f | |
; nes = non-empty set | |
(define (nes set) (if (set-empty? set) #f set)) | |
(define (correct word) | |
(let ([candidates (or (nes (known (list word))) (nes (known (edits1 word))) | |
(nes (known-edits2 word)) (set word))]) | |
(argmax (λ (w) (hash-ref nwords w 1)) (set->list candidates)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment