Last active
January 10, 2019 18:26
-
-
Save AO8/4cdfffcc19cf38c323a447b17b955c2f to your computer and use it in GitHub Desktop.
Use Python 3, NLTK, and the Carnegie Mellon University Pronouncing Dictionary to count the syllables in an English word or phrase.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# NLTK is a suite of libraries for working with human language data: http://www.nltk.org | |
# NLTK allows us to access the Carnegie Mellon University Prounouncing Dictionary (cmudict) | |
# cmudict is a corpus that contains almost 125,000 words mapped to their pronunciations. | |
# This tiny app was inspired by Lee Vaughn's Impractical Python Projects | |
import sys | |
from string import punctuation | |
from nltk.corpus import cmudict | |
# load corpus and build dictionary | |
cmudict = cmudict.dict() | |
def main(): | |
print_header() | |
run_event_loop() | |
def print_header(): | |
print("-----------------------------------------------------") | |
print(" Syllable Counter") | |
print("-----------------------------------------------------") | |
def run_event_loop(): | |
while True: | |
word = input("Enter a word or phrase <press Enter to exit>: ") | |
if word == "": | |
print("\nThanks for using Syllable Counter. Have a good day!") | |
sys.exit() | |
# note that use of fstrings requires Python 3.6+ | |
try: | |
num_syllables = count_syllables(word) | |
print(f"\nNumber of syllables in '{word}' is {num_syllables}.") | |
print() | |
except KeyError: | |
print("\nWord not found. Check your spelling or enter a new word.\n", | |
file=sys.stderr) # sys.stderr prints in red font, fyi | |
except Exception as e: | |
print(f"There was a problem processing this request. Details: {e}", | |
file=sys.stderr) | |
def count_syllables(words): | |
"""Use corpus to count syllables in English word or phrase""" | |
# prep word or prhase for syllable counting | |
words = words.replace("-", " ") | |
words = words.lower().split() | |
num_sylls = 0 | |
for word in words: | |
word = word.strip(punctuation) | |
if word.endswith("'s"): | |
word = word[:-2] | |
else: | |
# [0] in case there are multiple pronunciations, default to first | |
for phonemes in cmudict[word][0]: | |
for phoneme in phonemes: | |
if phoneme[-1].isdigit(): | |
num_sylls += 1 | |
return num_sylls | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment