Skip to content

Instantly share code, notes, and snippets.

@hamilton
Created September 7, 2009 17:11
Show Gist options
  • Save hamilton/182454 to your computer and use it in GitHub Desktop.
Save hamilton/182454 to your computer and use it in GitHub Desktop.
import sys
class Rhuthmos(object):
def __init__(self):
"""
wts = word to structure. A hash.
stw = structure to word. A tree.
"""
self.wts = {}
self.stw = {}
def initiate_dictionary(self):
"""what do I need to do? Store, and recall."""
dictionary = open('cmudict/cmudict.0.7a', 'r')
for line in dictionary:
line = line.split()
real_word = line[0].lower()
real_word = ' '.join(real_word.split('_'))
pronunciation = line[1:]
#print "%-20s %s" % (real_word, pronunciation)
if real_word not in self.wts:
self.wts[real_word] = pronunciation
pronunciation.reverse()
for i, phone in enumerate(pronunciation):
phone_subset = ''.join(pronunciation[0:i+1])
if phone_subset not in self.stw:
self.stw[phone_subset] = set()
self.stw[phone_subset].add(real_word)
def find(self, word):
"""wtf, man."""
pronunciation = self.wts[word]
rhymes = set()
for i, phone in enumerate(pronunciation):
phone_subset = ''.join(pronunciation[0:i+1])
#print i, pronunciation
if "1" in phone_subset:#"'" in phone_subset or "," in phone_subset:
phone_subset = ''.join(pronunciation[0:i+1])
#print self.stw[phone_subset]
rhymes = rhymes | self.stw[phone_subset]
return rhymes
def _is_not_linebreak(self, char):
return char not in '\n\t\r'
def main():
"""Here's a sample of how to use it"""
r = Rhuthmos()
r.initiate_dictionary()
print r.find('rhyme')
print r.find('crud')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment