Skip to content

Instantly share code, notes, and snippets.

@StephenKrewson
Last active December 16, 2015 09:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save StephenKrewson/5417204 to your computer and use it in GitHub Desktop.
Save StephenKrewson/5417204 to your computer and use it in GitHub Desktop.
~twitter poems~ (debuted at XS Collaborative exhibition with Hans Schoenburg, New Haven, April 2013)
import re, sys, nltk, string, matplotlib, HTMLParser
from nltk.corpus import cmudict
from random import choice, randint
from nltk.tokenize import *
from nltk import FreqDist
from twython import Twython
# OAuth2 authentication
t = Twython('',
'',
'',
'')
tweets = [s['text'].encode('utf-8') for s in t.search(q=sys.argv[1], count=sys.argv[2])['statuses']]
# (1.2) Store NLTK libraries (runtime atrocious if these are in the functions)
books = nltk.corpus.gutenberg.fileids()
lexicon = nltk.corpus.cmudict.dict()
### SECTION 2: FUNCTIONS ###
############################
def stripWord(word):
"""Returns tuple of stripped word, lexical stress count"""
stress = 0
word = HTMLParser.HTMLParser().unescape(word)
stripped = word.lower().translate(string.maketrans('',''), string.punctuation)
# Regex filters out hashtags, URLs
if not re.match(r'\"*[@#]|http|RT', word):
if stripped in lexicon:
for j in ''.join(lexicon[stripped][0]):
if j in ('1', '2'): # CMU dict has stress values of 1 or 2
stress += 1
return word.encode('utf-8'), stress
else: # Unknown words approximated as 1 total stress
return word.encode('utf-8'), 1
def buildBackground(num):
"""Generates a random chunk of Project Gutenberg text"""
# Grab a whole book with random 'choice' method
background = nltk.corpus.gutenberg.raw(choice(books)).replace('\n', ' ')
# Random start point with enough room for selection of 'num' length
start = randint(0, len(background) - int(num))
return background[start:start + int(num)]
def buildPoem(tweets):
"""Returns an array of lines determined by number of stresses"""
raw_poem, line, stress = [], '', 0
for word in ' '.join(tweets).split():
try:
stripped = stripWord(word)
stress += stripped[1]
if stress < 7:
line += stripped[0] + ' '
else:
raw_poem.append(line.encode('utf-8'))
stress = 1
line = stripped[0] + ' '
except: ValueError # stripWord will not return non-words
return raw_poem
def storeResults(poem):
"""Append output of buildPoem to a master data file"""
with open('XS_data.txt', 'ab') as f:
f.write(' '.join(poem))
def dataAnalyze():
"""Creates matplotlib chart of 50 most popular words"""
with open('XS_data.txt', 'r') as f:
data = f.read().decode('utf-8').replace('\n', ' ')
tokens = wordpunct_tokenize(data)
fdist = FreqDist(tokens)
return fdist.plot(10)
def superFunction(string, num):
"""Puts everything together!"""
poem = buildPoem(string, num)
#storeResults(poem[1])
for i in poem[0]:
print ' |||| '.join(i)
print
#dataAnalyze()
###Main Code###
for line in buildPoem(tweets):
print line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment