Skip to content

Instantly share code, notes, and snippets.

@thejeshgn
Last active January 3, 2018 17:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thejeshgn/cdc40a03cc0678c6e343fd01a3fa3d26 to your computer and use it in GitHub Desktop.
Save thejeshgn/cdc40a03cc0678c6e343fd01a3fa3d26 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# encoding: utf-8
import couchdb
import tweepy #https://github.com/tweepy/tweepy
import csv
import re
import arrow
import time
# The consumer keys can be found on your application's Details
# page located at https://dev.twitter.com/apps (under "OAuth settings")
consumer_key=""
consumer_secret=""
# The access tokens can be found on your applications's Details
# page located at https://dev.twitter.com/apps (located
# under "Your access token")
access_key=""
access_secret=""
couch_url = "https://username:password@mycouchdb.url.com"
remote_server = couchdb.Server(couch_url)
bulletinbabu_db = remote_server['bulletinbabu']
def get_all_tweets(screen_name):
#Twitter only allows access to a users most recent 3240 tweets with this method
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name = screen_name,count=200,tweet_mode="extended")
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
break
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest,tweet_mode="extended")
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
for tweet in alltweets:
print "--------------------------------------------------------------------------------------------"
bulletinbabu = {}
bulletinbabu['tw']=tweet.id
bulletinbabu['campaign']="#SpeakForMe"
bulletinbabu['_id'] = arrow.get(tweet.created_at).to('local').format('YYYY-MM-DDTHH:mm:ssZZ')
text = tweet.full_text.encode("utf-8")
print str(text)
if text.startswith("Emails from #SpeakForMe to:"):
bulletinbabu['stat']="email_sent"
regex_search = re.search('MPs:(.*) ', text, re.IGNORECASE)
if regex_search:
mps = regex_search.group(1)
mps = mps.replace(",","")
print str(mps)
bulletinbabu['mps']=int(mps.strip())
regex_search = re.search('Banks:(.*) ', text, re.IGNORECASE)
if regex_search:
banks = regex_search.group(1)
banks = banks.replace(",","")
bulletinbabu['banks']=int(banks.strip())
regex_search = re.search('Mobile service providers:(.*)\ ', text, re.IGNORECASE)
if regex_search:
mobile = regex_search.group(1)
mobile = mobile.replace(",","")
bulletinbabu['mobile']=int(mobile.strip())
regex_search = re.search('Government services:(.*)\ ', text, re.IGNORECASE)
if regex_search:
govt = regex_search.group(1)
govt = govt.replace(",","")
bulletinbabu['govt']=int(govt.strip())
regex_search = re.search('Others:(.*)\ ', text, re.IGNORECASE)
if regex_search:
others = regex_search.group(1)
others = others.replace(",","")
bulletinbabu['others']=int(others.strip())
regex_search = re.search('Total:(.*)\ ', text, re.IGNORECASE)
if regex_search:
total = regex_search.group(1)
total = total.replace(",","")
bulletinbabu['total']=int(total.strip())
print str(bulletinbabu)
try:
bulletinbabu_db.save(bulletinbabu)
except couchdb.http.ResourceConflict:
print "Already exists"
break
time.sleep(0.1)
elif text.startswith("Top recipients of #SpeakForMe emails:"):
#bulletinbabu['stat']="top_rcpt"
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets("bulletinbabu")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment