Skip to content

Instantly share code, notes, and snippets.

@JonathanReeve
Created June 5, 2019 14:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JonathanReeve/838763012865ab785f8b304d4e5e0346 to your computer and use it in GitHub Desktop.
Save JonathanReeve/838763012865ab785f8b304d4e5e0346 to your computer and use it in GitHub Desktop.
The Twitter Bot "Alternate DHSIs," which finds DHSI initialisms from Project Gutenberg texts.
import re
import requests
import json
import time
import tweepy
from secrets import *
# This requires that you have a file, secrets.py
# Which contains the keys below.
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)
api = tweepy.API(auth)
lastTime = 0
for i in range(49,50000):
print('Getting text with id: %s' % i)
metaResponse = requests.get('http://corpus-db.org/api/id/' + str(float(i)))
if metaResponse.ok:
metaParsed = json.loads(metaResponse.text)
if 'title' in metaParsed:
print('Title: ', metaParsed['title'])
else:
print('No title found.')
else:
print("Couldn't get metadata for id: %s" % i)
continue
textResponse = requests.get('http://corpus-db.org/api/id/' + str(float(i)) + '/fulltext')
if textResponse.ok:
textParsed = json.loads(textResponse.text)
if len(textParsed) > 0:
text = textParsed[0]['text'] if 'text' in textParsed[0] else ''
text = text.replace('\n', ' ') # Regex doesn't like to find things across newlines
matches = re.findall('\s([Dd]\w+\s[Hh]\w+\s[Ss]\w+\s[Ii]\w+)\s\w+', text, re.MULTILINE)
print('--------------- FOUND ----------------')
for match in matches:
timeSinceLast = time.time() - lastTime
while timeSinceLast < 3600:
print('Waiting...')
timeSinceLast = time.time() - lastTime
time.sleep(500)
tweet = "DHSI also stands for: \"" + match + "\" as found in " + metaParsed['title']
tweet += ". See http://gutenberg.org/ebooks/%s for the full text. #DHSI19" % i
print(tweet)
api.update_status(tweet)
lastTime = time.time()
else:
print('No text here.')
else:
print("Couldn't get full text!")
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment