Skip to content

Instantly share code, notes, and snippets.

@padraic
Created January 26, 2014 11:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save padraic/2bbfa946844f53d705fb to your computer and use it in GitHub Desktop.
Save padraic/2bbfa946844f53d705fb to your computer and use it in GitHub Desktop.
namespace Tools;
class SpellCheck
{
const ALPHABET = "abcdefghijklmnopqrstuvwxyz";
protected dictionary;
protected alphabet;
public function __construct()
{
let this->alphabet = str_split(self::ALPHABET);
let this->dictionary = [];
}
public function correctWord(string word)
{
var candidates, knowns, edits, known, edit, candidate;
long maxWeight = 0;
var weight;
string correction;
let candidates = [];
if (this->checkKnownWords([word])) {
return word;
} else {
let knowns = this->checkKnownWords(this->generateEditsFor(word));
if(knowns) {
for known in knowns {
let candidates[] = known;
}
} else {
let edits = this->checkKnownEdits(word);
if(edits) {
for edit in edits {
let candidates[] = edit;
}
} else {
return word;
}
}
}
for candidate in candidates {
let weight = this->dictionary[candidate];
if (weight > maxWeight) {
let maxWeight = weight;
let correction = candidate;
}
}
return correction;
}
public function loadDictionary(string wordFile, int size = 10000, int minLength = 4) -> void
{
let this->dictionary = [];
if unlikely !file_exists(wordFile) || !is_readable(wordFile) {
throw new \RuntimeException(
"Unable to load file: " . wordFile
);
}
var handle, match;
int i = 0;
let handle = fopen(wordFile, "r");
let match = fscanf(handle, "%s\t%f");
while !empty match && i < size {
if is_array(match) {//} && count(match) == 2 && strlen(match[0]) >= minLength {
let this->dictionary[strtolower(match[0])] = match[1];
let i++;
} else {
if !is_array(match) {
//var_dump(match);
break;
}
}
let match = fscanf(handle, "%s\t%f");
}
var_dump(count(this->dictionary));
fclose(handle);
}
protected function generateEditsFor(word)
{
int length, i;
var ch;
var edits;
let length = strlen(word);
let edits = [];
for i in range(0, length) {
let edits[] = substr(word, 0, i) . substr(word, i + 1);
for ch in this->alphabet {
let edits[] = substr(word, 0, i) . ch . substr(word, i + 1);
}
}
for i in range(0, length - 1) {
let edits[] = substr(word, 0, i) . substr(word, i + 1, 1) . substr(word, i, 1) . substr(word, i + 2);
}
for i in range(0, length) {
for ch in this->alphabet {
let edits[] = substr(word, 0, i). ch . substr(word, i);
}
}
return edits;
}
protected function checkKnownWords(words) {
var known, word;
let known = [];
for word in words {
if isset this->dictionary[word] {
let known[] = word;
}
}
return known;
}
protected function checkKnownEdits(word)
{
var known, edits1, edit1, edits2, edit2;
let known = [];
let edits1 = this->generateEditsFor(word);
for edit1 in edits1 {
let edits2 = this->generateEditsFor(edit1);
for edit2 in edits2 {
if isset this->dictionary[edit2] {
let known[] = edit2;
}
}
}
return known;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment