Created
March 14, 2021 18:24
-
-
Save jdh30/ce7698fe6c13f4cca5481ce05de96d77 to your computer and use it in GitHub Desktop.
Peter Norvig's spelling corrector written in F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System.Text.RegularExpressions | |
let alphabet = ['a'..'z'] | |
let edits1 (w: string) = | |
seq { for i in 0 .. w.Length do | |
if i < w.Length then | |
yield w.[0..i-1] + w.[i+1..] // Delete | |
if i < w.Length-1 then | |
yield w.[0..i-1] + w.[i+1..i+1] + w.[i..i] + w.[i+2..] // Swap | |
for c in alphabet do | |
yield w.[0..i-1] + string c + w.[i..] // Insert | |
if i < w.Length then | |
yield w.[0..i-1] + string c + w.[i+1..] } // Replace | |
let re = Regex ("[a-zA-Z]+", RegexOptions.Compiled) | |
let words = | |
use client = new System.Net.WebClient() | |
"https://github.com/dolph/dictionary/blob/master/enable1.txt?raw=true" | |
|> client.DownloadString | |
|> re.Matches | |
|> Seq.cast<Match> | |
|> Seq.map (fun x -> x.Value.ToLower()) | |
let wordMap = words |> Seq.countBy id |> Map.ofSeq | |
let correct arg = | |
let f = Seq.collect edits1 >> Seq.filter wordMap.ContainsKey | |
let best = Seq.maxBy (fun w -> wordMap.[w]) | |
Seq.unfold (fun ws -> Some(ws, f ws)) (f [arg]) | |
|> Seq.truncate 3 | |
|> Seq.tryPick (fun ws -> if Seq.isEmpty ws then None else Some(best ws)) | |
|> Option.defaultValue arg | |
correct "garbaging" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment