-
-
Save padraic/2bbfa946844f53d705fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Tools; | |
class SpellCheck | |
{ | |
const ALPHABET = "abcdefghijklmnopqrstuvwxyz"; | |
protected dictionary; | |
protected alphabet; | |
public function __construct() | |
{ | |
let this->alphabet = str_split(self::ALPHABET); | |
let this->dictionary = []; | |
} | |
public function correctWord(string word) | |
{ | |
var candidates, knowns, edits, known, edit, candidate; | |
long maxWeight = 0; | |
var weight; | |
string correction; | |
let candidates = []; | |
if (this->checkKnownWords([word])) { | |
return word; | |
} else { | |
let knowns = this->checkKnownWords(this->generateEditsFor(word)); | |
if(knowns) { | |
for known in knowns { | |
let candidates[] = known; | |
} | |
} else { | |
let edits = this->checkKnownEdits(word); | |
if(edits) { | |
for edit in edits { | |
let candidates[] = edit; | |
} | |
} else { | |
return word; | |
} | |
} | |
} | |
for candidate in candidates { | |
let weight = this->dictionary[candidate]; | |
if (weight > maxWeight) { | |
let maxWeight = weight; | |
let correction = candidate; | |
} | |
} | |
return correction; | |
} | |
public function loadDictionary(string wordFile, int size = 10000, int minLength = 4) -> void | |
{ | |
let this->dictionary = []; | |
if unlikely !file_exists(wordFile) || !is_readable(wordFile) { | |
throw new \RuntimeException( | |
"Unable to load file: " . wordFile | |
); | |
} | |
var handle, match; | |
int i = 0; | |
let handle = fopen(wordFile, "r"); | |
let match = fscanf(handle, "%s\t%f"); | |
while !empty match && i < size { | |
if is_array(match) {//} && count(match) == 2 && strlen(match[0]) >= minLength { | |
let this->dictionary[strtolower(match[0])] = match[1]; | |
let i++; | |
} else { | |
if !is_array(match) { | |
//var_dump(match); | |
break; | |
} | |
} | |
let match = fscanf(handle, "%s\t%f"); | |
} | |
var_dump(count(this->dictionary)); | |
fclose(handle); | |
} | |
protected function generateEditsFor(word) | |
{ | |
int length, i; | |
var ch; | |
var edits; | |
let length = strlen(word); | |
let edits = []; | |
for i in range(0, length) { | |
let edits[] = substr(word, 0, i) . substr(word, i + 1); | |
for ch in this->alphabet { | |
let edits[] = substr(word, 0, i) . ch . substr(word, i + 1); | |
} | |
} | |
for i in range(0, length - 1) { | |
let edits[] = substr(word, 0, i) . substr(word, i + 1, 1) . substr(word, i, 1) . substr(word, i + 2); | |
} | |
for i in range(0, length) { | |
for ch in this->alphabet { | |
let edits[] = substr(word, 0, i). ch . substr(word, i); | |
} | |
} | |
return edits; | |
} | |
protected function checkKnownWords(words) { | |
var known, word; | |
let known = []; | |
for word in words { | |
if isset this->dictionary[word] { | |
let known[] = word; | |
} | |
} | |
return known; | |
} | |
protected function checkKnownEdits(word) | |
{ | |
var known, edits1, edit1, edits2, edit2; | |
let known = []; | |
let edits1 = this->generateEditsFor(word); | |
for edit1 in edits1 { | |
let edits2 = this->generateEditsFor(edit1); | |
for edit2 in edits2 { | |
if isset this->dictionary[edit2] { | |
let known[] = edit2; | |
} | |
} | |
} | |
return known; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment