Skip to content

Instantly share code, notes, and snippets.

@Mezzle
Created May 22, 2012 16:55
Show Gist options
  • Save Mezzle/2770227 to your computer and use it in GitHub Desktop.
Save Mezzle/2770227 to your computer and use it in GitHub Desktop.
SpellChecker Test
import random, re
from string import join
class SpellCheckerTest:
vowels = ['a','e','i','o','u']
def pick_words(self):
sample = []
f = open('/usr/share/dict/words')
num_lines = 5
for i, line in enumerate(f):
if (i < num_lines):
sample.append(line)
elif i >= num_lines and random.random() < num_lines/float(i+1):
replace = random.randint(0, len(sample)-1)
sample[replace] = line
f.close()
return sample
def clean_word(self, word):
return word.strip()
def generate_words(self):
words = self.pick_words()
clean_words = []
for word in words:
clean_words.append(self.clean_word(word))
new_words = [];
for word in clean_words:
new_word = self.mangle_word(word)
new_words.append(new_word)
return new_words
def mangle_letter(self, letter):
mod_bits = random.getrandbits(3)
if (letter in self.vowels) and (mod_bits & 4):
letter = random.choice(self.vowels)
if mod_bits & 2:
letter += letter
if mod_bits & 1:
letter = letter.upper()
return letter
def mangle_word(self, word):
new_word = []
bits = random.getrandbits(len(word))
for i, c in enumerate(word):
if bits & i:
c = self.mangle_letter(c)
new_word.append(c)
return ''.join(new_word)
if __name__ == "__main__":
o = SpellCheckerTest()
for word in o.generate_words():
print word
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment