Last active
August 29, 2015 14:19
-
-
Save dannguyen/c6635797f602a6615465 to your computer and use it in GitHub Desktop.
Code for @BeholdEveryWord Twitter Bot: https://twitter.com/beholdeveryword
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import logging | |
import os.path | |
import tweepy | |
import requests | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger('beholdeveryword.apis') | |
def tweepy_client(credsfile): | |
""" | |
Expects {credsfile} to be a filename for a JSON file in this format: | |
{ | |
"consumer_key": "x", | |
"consumer_secret": "y", | |
"access_token": "z", | |
"access_token_secret": "aa" | |
} | |
Returns an authenticated tweepy.API object | |
""" | |
credsfile = os.path.expanduser(credsfile) | |
creds = json.load(open(credsfile)) | |
# Get authentication token | |
auth = tweepy.OAuthHandler(consumer_key = creds['consumer_key'], | |
consumer_secret = creds['consumer_secret']) | |
auth.set_access_token(creds['access_token'], | |
creds['access_token_secret']) | |
# create an API handler | |
return tweepy.API(auth) | |
def get_latest_timeline_tweet_text(credsfile): | |
""" | |
Returns the latest text string from the authenticated user's | |
timeline, or None if no tweet yet exists | |
""" | |
t = tweepy_client(credsfile).user_timeline(count = 1, trim_user = True, | |
exclude_replies = True, include_rts = False | |
) | |
if t[0]: | |
return t[0].text | |
def send_tweet(txt, credsfile): | |
""" | |
Sends out {txt} as a new tweet with the | |
authenticated account from {credsfile} | |
Returns the tweepy Response object | |
""" | |
t = tweepy_client(credsfile) | |
resp = t.update_status(status = txt) | |
return resp._json | |
def get_wikipedia_url_for_word(word): | |
""" | |
contacts Wikipedia's API to see if an article with {word}.capitalize() exists | |
Returns wikipedia URL with {word} as title if it does exist | |
or None if not | |
note: this method is pretty sloppy and assumes word is just a single word with | |
all alphabet letters | |
""" | |
wend_point = "http://en.wikipedia.org/w/api.php?format=json&action=query&prop=info&titles=" | |
title = word.capitalize() | |
resp = requests.get(wend_point + title).json() | |
if resp['query']['pages'].get('-1'): | |
return None | |
else: | |
return "http://en.wikipedia.org/wiki/%s" % title | |
def get_biblehub_url_for_word(word): | |
""" | |
Given {word} like "Abel" | |
creates the biblehub.com topic URL: | |
e.g. http://biblehub.com/topical/a/abel.html | |
and checks to see if it exists (i.e. has a HTTP status of 200) | |
Returns the biblehub.com URL or None | |
""" | |
slug = word.lower() | |
url = 'http://biblehub.com/topical/%s/%s.htm' % (slug[0], slug) | |
resp = requests.head(url) | |
return url if resp.status_code == 200 else None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from apis import send_tweet, get_latest_timeline_tweet_text | |
from apis import get_wikipedia_url_for_word, get_biblehub_url_for_word | |
from words import create_words_file, find_next_wordline | |
import json | |
import logging | |
import os.path | |
import re | |
TWITTER_CREDS = "~/.behold.twittercreds.json" | |
SOURCE_URL = 'http://www.gutenberg.org/cache/epub/10/pg10.txt' | |
WORDS_FILENAME = "/tmp/biblewords.txt" | |
TWEET_TEMPLATE = "Behold %s and its %s Biblical %s!\n👼🙏😇" | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger('beholdeveryword.every_word') | |
def create_next_tweet_text(tweet_txt, words_filename): | |
""" | |
Expects TWEET_TEMPLATE to be a String | |
{tweet_txt} is a String. A regex is used to extract a particular | |
{word}, and this is passed into words.find_next_wordline() to | |
get the next [word, word_count] (e.g. {seq}) to tweet | |
Returns a text string or None, depending on whether | |
find_next_wordline() returned {seq} or None | |
""" | |
new_tweet = None | |
mtch = re.search('(?<=Behold )[A-Z]+', tweet_txt) | |
word = mtch.group() if mtch else None | |
seq = find_next_wordline(current_word = word, words_filename = words_filename) | |
if seq is not None: | |
word = seq[0].upper() | |
tx = "appearances" if int(seq[1]) > 1 else 'appearance' | |
new_tweet = TWEET_TEMPLATE % (word, seq[1], tx) | |
## attempt to add BibleHub link | |
b_url = get_biblehub_url_for_word(word) | |
if b_url: | |
new_tweet += "\nBibleHub: " + b_url | |
## attempt to add Wikipedia link | |
w_url = get_wikipedia_url_for_word(word) | |
if w_url: | |
new_tweet += "\nWikipedia: " + w_url | |
return new_tweet | |
def dotweet(testing = False): | |
""" | |
Downloads and creates Biblical word count (WORDS_FILE) if necessary. | |
Gets latest tweet from a given account (TWITTER_SCREEN_NAME) | |
Uses Tweepy to send a "Behold..." tweet | |
Returns response object or None | |
""" | |
create_words_file(source_url = SOURCE_URL, words_filename = WORDS_FILENAME, | |
start_pt = '1:1', end_pt = 'END OF THE PROJECT GUTENBERG' | |
) | |
# Note: If the latest tweet isn't of the expected "Behold..." format, | |
# then the Twitter sequence will __start over__ | |
tweet_text = get_latest_timeline_tweet_text(credsfile = TWITTER_CREDS) | |
logger.info("Latest tweet is: \"%s\"" % tweet_text) | |
# Now formulate the next tweet to send out | |
next_tweet = create_next_tweet_text(tweet_text, WORDS_FILENAME) | |
if next_tweet is None: | |
logger.warning("Nothing to tweet") | |
return None | |
else: | |
logger.info("Tweeting: \"%s\"" % next_tweet) | |
# send the tweet | |
if testing == True: | |
return next_tweet | |
else: | |
resp = send_tweet(next_tweet, credsfile = TWITTER_CREDS) | |
return resp | |
if __name__ == "__main__": | |
resp = dotweet() | |
if type(resp) is dict: | |
j = json.dumps(resp, indent = 2) | |
print(j) | |
else: | |
print(resp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os.path | |
import requests | |
import re | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger('beholdeveryword.words') | |
def create_words_file(source_url, words_filename, start_pt = "", end_pt = "", do_refresh = False): | |
""" | |
Downloads latest Bible file and creates WORDS_FILE | |
""" | |
# if it already exists, and no need to refresh it, then | |
# just do nothing | |
if(os.path.exists(words_filename) | |
and os.path.getsize(words_filename) > 1000 | |
and do_refresh == False | |
): | |
return | |
# else, re-download, and re-make | |
logger.info("Downloading text from %s" % source_url) | |
text = requests.get(source_url).text.upper() | |
a = text.index(start_pt) | |
b = text.index(end_pt) | |
words = re.findall("[A-Z]+", text[a:b]) | |
wordcounts = {} | |
for word in words: | |
if word not in wordcounts: | |
wordcounts[word] = 1 | |
else: | |
wordcounts[word] += 1 | |
with open(words_filename, "w") as f: | |
for word, x in sorted(wordcounts.items()): | |
f.write('%s,%s\n' % (word, x)) | |
logger.info("%s is %s bytes" % (words_filename, os.path.getsize(words_filename))) | |
# return the path to the file | |
return words_filename | |
def find_next_wordline(words_filename, current_word = None): | |
""" | |
Finds line after the first match of current_word | |
Returns a sequence/list, [word, word_count] | |
or, | |
if word is not found, or end of file, | |
returns None | |
""" | |
nextline = '' | |
with open(words_filename) as f: | |
if current_word is None: | |
nextline = f.readline() | |
else: | |
for line in f: | |
r = "^%s(?=,)" % current_word | |
s = re.search(r,line) | |
if s: | |
nextline = f.readline() | |
break | |
if nextline is '': | |
return None | |
else: | |
return nextline.strip().split(',') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment