Skip to content

Instantly share code, notes, and snippets.

@airspeedswift
Last active June 21, 2024 07:57
Show Gist options
  • Save airspeedswift/1cd9cd1607de104ecf7bbfb80865c8bd to your computer and use it in GitHub Desktop.
Save airspeedswift/1cd9cd1607de104ecf7bbfb80865c8bd to your computer and use it in GitHub Desktop.
A simple spelling corrector in Swift
// Swift version of the spelling checker described in http://norvig.com/spell-correct.html
import Foundation
let alphabet = "abcdefghijklmnopqrstuvwxyz"
/// Given a word, produce a set of possible edits with one character
/// transposed, deleted, replaced or a rogue character inserted
func edits(_ word: String) -> Set<String> {
// create an array of the word split into pairs at each possible point
// including an empty string at the start
var splits = word.indices.map { (word[..<$0], word[$0...]) }
// and the empty string at the end
splits.append((word[...],""))
// delete the first character of the right part
let deletes = splits.map { left, right in
"\(left)\(right.dropFirst())"
}
// transpose the first and second character of the right part
let transposed = splits.compactMap { left, right in
if let first = right.first, let second = right.dropFirst().first {
"\(left)\(second)\(first)\(right.dropFirst(2))"
} else {
nil
}
}
// replace the first character of the right with every letter
let replaced = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(right.dropFirst())" }
}
// insert every letter between left and right
let inserted = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(right)" }
}
return Set(deletes + transposed + replaced + inserted)
}
struct SpellChecker {
var knownWords: [String:Int] = [:]
init(from file: String) throws {
let contents = try String(contentsOfFile: file, encoding: .utf8)
for word in contents.split(whereSeparator: \.isWhitespace) {
// bump known words
knownWords[word.lowercased(), default: 0] += 1
}
print("Trained \(knownWords.count) unique words")
}
func known(words: Set<String>) -> [String:Int] {
knownWords.filter { words.contains($0.key) }
}
func best(for words: Set<String>) -> String? {
known(words: words).max(by: { $0.value < $1.value })?.key
}
func correct(word: String) -> String? {
if knownWords.keys.contains(word) { return word }
let edits1 = edits(word)
if let best1 = best(for: edits1) {
return best1
}
let edits2 = Set(edits1.flatMap(edits))
if let best2 = best(for: edits2) {
return best2
}
return nil
}
}
// the one argument to this is a big corpus of text to train the checker
// such as http://norvig.com/big.txt
let checker = try! SpellChecker(from: CommandLine.arguments[1])
while let word = readLine() {
let response =
switch checker.correct(word: word) {
case word?: "Correct!"
case let corrected?: "Corrected: \(corrected)"
case nil: "Not found."
}
print(response)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment