Skip to content

Instantly share code, notes, and snippets.

@lojic
Created November 8, 2021 18:43
Show Gist options
  • Save lojic/720d0b816c2e695a23b565d84a64de65 to your computer and use it in GitHub Desktop.
Save lojic/720d0b816c2e695a23b565d84a64de65 to your computer and use it in GitHub Desktop.
#lang racket
(define (words text) (regexp-match* #rx"[a-z]+" (string-downcase text)))
(define (train features)
(define model (make-hash))
(for ([f features]) (hash-update! model f add1 1)) model)
(define nwords (train (words (file->string "big.txt"))))
(define alphabet "abcdefghijklmnopqrstuvwxyz")
(define (edits1 word)
(let* ([splits (for/list ([i (in-range (+ (string-length word) 1))])
(cons (substring word 0 i) (substring word i)))]
[deletes (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 0))
(string-append a (substring b 1)))]
[transposes (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 1))
(string-append a (substring b 1 2) (substring b 0 1) (substring b 2)))]
[replaces (for/set ([(a b) (in-dict splits)] #:when (> (string-length b) 0)
[c alphabet])
(string-append a (string c) (substring b 1)))]
[inserts (for*/set ([(a b) (in-dict splits)]
[c alphabet])
(string-append a (string c) b))])
(set-union deletes transposes replaces inserts)))
(define (known-edits2 word)
(for*/set ([e1 (edits1 word)]
[e2 (edits1 e1)] #:when (hash-has-key? nwords e2))
e2))
(define (known words)
(for/set ([w words] #:when (hash-has-key? nwords w)) w))
; If the set argument is non-empty, return it; otherwise, return #f
; nes = non-empty set
(define (nes set) (if (set-empty? set) #f set))
(define (correct word)
(let ([candidates (or (nes (known (list word))) (nes (known (edits1 word)))
(nes (known-edits2 word)) (set word))])
(argmax (λ (w) (hash-ref nwords w 1)) (set->list candidates))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment