Created
June 23, 2014 01:14
-
-
Save laurenorsini/f38c0113e41e9b4504a7 to your computer and use it in GitHub Desktop.
Copy of OwenEbooks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import re | |
import sys | |
import twitter | |
import markov | |
from htmlentitydefs import name2codepoint as n2c | |
from local_settings import * | |
def connect(): | |
api = twitter.Api(consumer_key=MY_CONSUMER_KEY, | |
consumer_secret=MY_CONSUMER_SECRET, | |
access_token_key=MY_ACCESS_TOKEN_KEY, | |
access_token_secret=MY_ACCESS_TOKEN_SECRET) | |
return api | |
def entity(text): | |
if text[:2] == "&#": | |
try: | |
if text[:3] == "&#x": | |
return unichr(int(text[3:-1], 16)) | |
else: | |
return unichr(int(text[2:-1])) | |
except ValueError: | |
pass | |
else: | |
guess = text[1:-1] | |
numero = n2c[guess] | |
try: | |
text = unichr(numero) | |
except KeyError: | |
pass | |
return text | |
def filter_tweet(tweet): | |
tweet.text = re.sub(r'\b(RT|MT) .+','',tweet.text) #take out anything after RT or MT | |
tweet.text = re.sub(r'(\#|@|(h\/t)|(http))\S+','',tweet.text) #Take out URLs, hashtags, hts, etc. | |
tweet.text = re.sub(r'\n','', tweet.text) #take out new lines. | |
tweet.text = re.sub(r'\"|\(|\)', '', tweet.text) #take out quotes. | |
htmlsents = re.findall(r'&\w+;', tweet.text) | |
if len(htmlsents) > 0 : | |
for item in htmlsents: | |
tweet.text = re.sub(item, entity(item), tweet.text) | |
tweet.text = re.sub(r'\xe9', 'e', tweet.text) #take out accented e | |
return tweet.text | |
def grab_tweets(api, max_id=None): | |
source_tweets=[] | |
user_tweets = api.GetUserTimeline(screen_name=user, count=200, max_id=max_id, include_rts=True, trim_user=True, exclude_replies=True) | |
max_id = user_tweets[len(user_tweets)-1].id-1 | |
for tweet in user_tweets: | |
tweet.text = filter_tweet(tweet) | |
if len(tweet.text) != 0: | |
source_tweets.append(tweet.text) | |
return source_tweets, max_id | |
if __name__=="__main__": | |
order = ORDER | |
if DEBUG==False: | |
guess = random.choice(range(ODDS)) | |
else: | |
guess = 0 | |
if guess == 0: | |
if STATIC_TEST==True: | |
file = TEST_SOURCE | |
print ">>> Generating from {0}".format(file) | |
string_list = open(file).readlines() | |
for item in string_list: | |
source_tweets = item.split(",") | |
else: | |
source_tweets = [] | |
for handle in SOURCE_ACCOUNTS: | |
user=handle | |
api=connect() | |
max_id=None | |
for x in range(17)[1:]: | |
source_tweets_iter, max_id = grab_tweets(api,max_id) | |
source_tweets += source_tweets_iter | |
print "{0} tweets found in {1}".format(len(source_tweets), handle) | |
if len(source_tweets) == 0: | |
print "Error fetching tweets from Twitter. Aborting." | |
sys.exit() | |
mine = markov.MarkovChainer(order) | |
for tweet in source_tweets: | |
if re.search('([\.\!\?\"\']$)', tweet): | |
pass | |
else: | |
tweet+="." | |
mine.add_text(tweet) | |
for x in range(0,10): | |
ebook_tweet = mine.generate_sentence() | |
#randomly drop the last word, as Horse_ebooks appears to do. | |
if random.randint(0,4) == 0 and re.search(r'(in|to|from|for|with|by|our|of|your|around|under|beyond)\s\w+$', ebook_tweet) != None: | |
print "Losing last word randomly" | |
ebook_tweet = re.sub(r'\s\w+.$','',ebook_tweet) | |
print ebook_tweet | |
#if a tweet is very short, this will randomly add a second sentence to it. | |
if ebook_tweet != None and len(ebook_tweet) < 40: | |
rando = random.randint(0,10) | |
if rando == 0 or rando == 7: | |
print "Short tweet. Adding another sentence randomly" | |
newer_tweet = mine.generate_sentence() | |
if newer_tweet != None: | |
ebook_tweet += " " + mine.generate_sentence() | |
else: | |
ebook_tweet = ebook_tweet | |
elif rando == 1: | |
#say something crazy/prophetic in all caps | |
print "ALL THE THINGS" | |
ebook_tweet = ebook_tweet.upper() | |
#throw out tweets that match anything from the source account. | |
if ebook_tweet != None and len(ebook_tweet) < 110: | |
for tweet in source_tweets: | |
if ebook_tweet[:-1] not in tweet: | |
continue | |
else: | |
print "TOO SIMILAR: " + ebook_tweet | |
sys.exit() | |
if DEBUG == False: | |
status = api.PostUpdate(ebook_tweet) | |
print status.text.encode('utf-8') | |
else: | |
print ebook_tweet | |
elif ebook_tweet == None: | |
print "Tweet is empty, sorry." | |
else: | |
print "TOO LONG: " + ebook_tweet | |
else: | |
print str(guess) + " No, sorry, not this time." #message if the random number fails. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment