Skip to content

Instantly share code, notes, and snippets.

@jacobbridges
Last active August 29, 2015 14:12
Show Gist options
  • Save jacobbridges/54f8bbb47d1bc0ace0b0 to your computer and use it in GitHub Desktop.
Save jacobbridges/54f8bbb47d1bc0ace0b0 to your computer and use it in GitHub Desktop.
Obama Speech Text Analysis 3
import re
from collections import Counter
SPEECH_TO_PROCESS = "Year End Press Conference -- (12-19-2014).txt"
# Open speech document
with open(SPEECH_TO_PROCESS, "r") as SPEECH_FILE:
# Read speech text into variable, converting any pesky unicode characters
speech = SPEECH_FILE.read().decode('utf8').encode("ascii", "ignore")
# Remove all apostrophes from text (easier to handle contractions)
speech = speech.replace("'", '')
# Get all words from document with regex
words = re.findall(r'\w+', speech.lower())
# Load word list into Counter object
c = Counter(words)
# Calculate the progressive score
progressive_score = 0 + (c["going"] + c["will"] + c["next"] + c["tomorrow"] + c["future"]) - (c["have"] + c["did"] + c["done"] + c["past"] + c["yesterday"] + c["last"])
print "Progressive Score = " + str(progressive_score)
###
# RESULT Progressive Score = 18
###
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment