#!/usr/bin/python # -*- coding: utf-8 -*- import re fh = open('cmudict-0.4.scm', 'r') adict = {} wl = open('wordlist.txt') line = wl.readline() while line: line = line.strip() adict[line] = 1 line = wl.readline() mapper = { 's': '0', 'z': '0', 't': '1', 'd': '1', 'dh': '1', 'th': '1', 'n': '2', 'm': '3', 'r': '4', 'er0': '4', 'er1': '4', 'l': '5', 'jh': '6', 'sh': '6', 'ch': '6', 'zh': '6', 'k': '7', 'g': '7', 'f': '8', 'v': '8', 'p': '9', 'b': '9', 'ng': '27', } line = fh.readline() while line: m = re.match(r"\(\"(\w+)\" (\w+) \((.*)\)\)\n", line) if m is not None: word = m.group(1) alist = m.group(3).split(' ') number = ''.join([mapper.get(i, '') for i in alist]) if number and adict.has_key(word): print '%s\t%s' % (word, number) line = fh.readline()