Skip to content

Instantly share code, notes, and snippets.

@sergeytimoshin
Created April 10, 2017 11:42
Show Gist options
  • Save sergeytimoshin/ae2b7152ac425a8de1a1d2b47b0b27ce to your computer and use it in GitHub Desktop.
Save sergeytimoshin/ae2b7152ac425a8de1a1d2b47b0b27ce to your computer and use it in GitHub Desktop.
/// Translation of [Peter Norvig's spell checker](http://norvig.com/spell-correct.html) into Swift 3.
/// Sample input corpus [here](http://norvig.com/big.txt)
/// Swift 2 version: https://gist.github.com/airspeedswift/6d8c9d95f0d812f58be3
import Foundation // purely for IO, most things done with Swift.String
/// Given a word, produce a set of possible alternatives with
/// letters transposed, deleted, replaced or rogue characters inserted
func edits(word: String) -> Set<String> {
if word.isEmpty { return [] }
let splits = word.characters.indices.map {
(word[word.startIndex..<$0], word[$0..<word.endIndex])
}
let deletes = splits.map { $0.0 + String($0.1.characters.dropFirst()) }
let transposes: [String] = splits.map { left, right in
if let fst = right.characters.first {
let drop1 = String(right.characters.dropFirst())
if let snd = drop1.characters.first {
let drop2 = String(drop1.characters.dropFirst())
return "\(left)\(snd)\(fst)\(drop2)"
}
}
return ""
}.filter { !$0.isEmpty }
let alphabet = "abcdefghijklmnopqrstuvwxyz"
let replaces = splits.flatMap { left, right in
alphabet.characters.map { "\(left)\($0)\(String(right.characters.dropFirst()))" }
}
let inserts = splits.flatMap { left, right in
alphabet.characters.map { "\(left)\($0)\(right)" }
}
return Set(deletes + transposes + replaces + inserts)
}
struct SpellChecker {
var knownWords: [String:Int] = [:]
mutating func train(word: String) {
if let idx = knownWords[word] {
knownWords[word] = idx + 1
}
else {
knownWords[word] = 1
}
}
init?(contentsOfFile file: String) {
do {
let text = try String(contentsOfFile: file, encoding: .utf8).lowercased()
let words = text.unicodeScalars.split(whereSeparator: { !("a"..."z").contains($0) }).map { String($0) }
for word in words { self.train(word: word) }
}
catch {
return nil
}
}
func knownEdits2(word: String) -> Set<String>? {
var known_edits: Set<String> = []
for edit in edits(word: word) {
if let k = known(words: edits(word: edit)) {
known_edits.formUnion(k)
}
}
return known_edits.isEmpty ? nil : known_edits
}
func known<S: Sequence>(words: S) -> Set<String>? where S.Iterator.Element == String {
let s = Set(words.filter { self.knownWords.index(forKey: $0) != nil })
return s.isEmpty ? nil : s
}
func candidates(word: String) -> Set<String> {
guard let result = known(words: [word]) ?? known(words: edits(word: word)) ?? knownEdits2(word: word) else {
return Set<String>()
}
return result
}
func correct(word: String) -> String {
return candidates(word: word).reduce(word) {
(knownWords[$0] ?? 1) < (knownWords[$1] ?? 1) ? $1 : $0
}
}
}
// main()
if CommandLine.argc == 2, let checker = SpellChecker(contentsOfFile: CommandLine.arguments[1]) {
print("Type word to check and hit enter")
while let word = String(data: FileHandle.standardInput.availableData, encoding: .utf8), word.characters.last == "\n"
{
let word = String(word.characters.dropLast())
let checked = checker.correct(word: word)
let candidates = checker.candidates(word: word)
if word == checked {
print("\(word) unchanged")
}
else {
print("Correct:\t\(word) -> \(checked)")
print("Candidates:\t\(word) -> \(candidates)")
}
}
}
else {
print("Usage: (Process.arguments[0]) <corpus_filename>")
}
@lvandal
Copy link

lvandal commented Feb 12, 2019

Thanks for this! However, spell check fails for words like youre (checked is your instead of you're), etc.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment