Skip to content

Instantly share code, notes, and snippets.

@AO8
Last active January 10, 2019 18:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AO8/4cdfffcc19cf38c323a447b17b955c2f to your computer and use it in GitHub Desktop.
Save AO8/4cdfffcc19cf38c323a447b17b955c2f to your computer and use it in GitHub Desktop.
Use Python 3, NLTK, and the Carnegie Mellon University Pronouncing Dictionary to count the syllables in an English word or phrase.
# NLTK is a suite of libraries for working with human language data: http://www.nltk.org
# NLTK allows us to access the Carnegie Mellon University Prounouncing Dictionary (cmudict)
# cmudict is a corpus that contains almost 125,000 words mapped to their pronunciations.
# This tiny app was inspired by Lee Vaughn's Impractical Python Projects
import sys
from string import punctuation
from nltk.corpus import cmudict
# load corpus and build dictionary
cmudict = cmudict.dict()
def main():
print_header()
run_event_loop()
def print_header():
print("-----------------------------------------------------")
print(" Syllable Counter")
print("-----------------------------------------------------")
def run_event_loop():
while True:
word = input("Enter a word or phrase <press Enter to exit>: ")
if word == "":
print("\nThanks for using Syllable Counter. Have a good day!")
sys.exit()
# note that use of fstrings requires Python 3.6+
try:
num_syllables = count_syllables(word)
print(f"\nNumber of syllables in '{word}' is {num_syllables}.")
print()
except KeyError:
print("\nWord not found. Check your spelling or enter a new word.\n",
file=sys.stderr) # sys.stderr prints in red font, fyi
except Exception as e:
print(f"There was a problem processing this request. Details: {e}",
file=sys.stderr)
def count_syllables(words):
"""Use corpus to count syllables in English word or phrase"""
# prep word or prhase for syllable counting
words = words.replace("-", " ")
words = words.lower().split()
num_sylls = 0
for word in words:
word = word.strip(punctuation)
if word.endswith("'s"):
word = word[:-2]
else:
# [0] in case there are multiple pronunciations, default to first
for phonemes in cmudict[word][0]:
for phoneme in phonemes:
if phoneme[-1].isdigit():
num_sylls += 1
return num_sylls
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment