Skip to content

Instantly share code, notes, and snippets.

@willwade
Last active October 26, 2023 10:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willwade/091ba471fe3897fb4f0593c58928e751 to your computer and use it in GitHub Desktop.
Save willwade/091ba471fe3897fb4f0593c58928e751 to your computer and use it in GitHub Desktop.
PPM in swift courtesy of GPT. Here The generateCandidates(word:) method generates candidate words by swapping adjacent characters. This is a very simplistic way to generate candidates; in a real-world application, you might use more sophisticated techniques like Damerau-Levenshtein distance. The autocorrect(word:context:topN:) method takes a mis…
/** So really you'd want to update that training text continually too - but you ideally need a way for people correcting this training text. Something that was easy enough with dasher although undcoumented. You literally edit the text file.
Also note this for autocorrection - but not sure how we would implement this
**/
extension PPM {
// Generate candidate words by swapping adjacent characters
func generateCandidates(word: String) -> [String] {
var candidates: [String] = []
var chars = Array(word)
for i in 0..<(chars.count - 1) {
chars.swapAt(i, i + 1)
candidates.append(String(chars))
chars.swapAt(i, i + 1) // Swap back to original
}
return candidates
}
// Autocorrect a misspelled word
func autocorrect(word: String, context: String, topN: Int) -> [(String, Double)] {
let candidates = generateCandidates(word: word)
var scoredCandidates: [(String, Double)] = []
for candidate in candidates {
if let likelihoods = predict(context: candidate, topN: 1).words.first?.value {
scoredCandidates.append((candidate, likelihoods))
}
}
return scoredCandidates.sorted { $0.1 > $1.1 }.prefix(topN)
}
}
// Usage
let ppm = PPM()
ppm.train(text: "hello world hello everyone")
// Autocorrect the misspelled word "helo" given the context "hel"
let corrections = ppm.autocorrect(word: "helo", context: "hel", topN: 3)
print("Autocorrection suggestions:")
for (word, likelihood) in corrections {
print("Word: \(word), Likelihood: \(likelihood)")
}
import Foundation
class Node {
var children: [Character: Node] = [:]
var frequency: Int = 0
var words: [String: Int] = [:]
}
class PPM {
let root = Node()
func train(fromFile fileURL: URL) {
do {
let text = try String(contentsOf: fileURL, encoding: .utf8)
let words = text.split(separator: " ")
for word in words {
var currentNode = root
for char in word {
currentNode.frequency += 1
if currentNode.children[char] == nil {
currentNode.children[char] = Node()
}
currentNode = currentNode.children[char]!
}
currentNode.frequency += 1
currentNode.words[String(word), default: 0] += 1
}
} catch {
print("Error reading file: \(error)")
}
}
func predict(context: String, topN: Int) -> (letters: [(Character, Double)], words: [(String, Double)]) {
let chars = Array(context)
var currentNode = root
for char in chars.reversed() {
if let nextNode = currentNode.children[char] {
currentNode = nextNode
} else {
return ([], [])
}
}
let topLetters = mostFrequentChildren(of: currentNode, topN: topN)
let topWords = mostFrequentWords(of: currentNode, topN: topN)
return (topLetters, topWords)
}
private func mostFrequentChildren(of node: Node, topN: Int) -> [(Character, Double)] {
var predictions: [(Character, Double)] = []
let totalFrequency = Double(node.frequency)
let sortedChildren = node.children.sorted { $0.value.frequency > $1.value.frequency }
for (char, childNode) in sortedChildren.prefix(topN) {
let likelihood = Double(childNode.frequency) / totalFrequency
predictions.append((char, likelihood))
}
return predictions
}
private func mostFrequentWords(of node: Node, topN: Int) -> [(String, Double)] {
var predictions: [(String, Double)] = []
let totalFrequency = Double(node.frequency)
let sortedWords = node.words.sorted { $0.value > $1.value }
for (word, freq) in sortedWords.prefix(topN) {
let likelihood = Double(freq) / totalFrequency
predictions.append((word, likelihood))
}
return predictions
}
}
// Usage
let ppm = PPM()
// Replace this URL with the actual file URL
if let fileURL = URL(string: "path/to/your/text/file.txt") {
ppm.train(fromFile: fileURL)
}
let (topLetters, topWords) = ppm.predict(context: "hell", topN: 6)
print("Top letter predictions:")
for (char, likelihood) in topLetters {
print("Next letter: \(char), Likelihood: \(likelihood)")
}
print("\nTop word predictions:")
for (word, likelihood) in topWords {
print("Next word: \(word), Likelihood: \(likelihood)")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment