Last active
January 3, 2018 17:30
-
-
Save thejeshgn/cdc40a03cc0678c6e343fd01a3fa3d26 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import couchdb | |
import tweepy #https://github.com/tweepy/tweepy | |
import csv | |
import re | |
import arrow | |
import time | |
# The consumer keys can be found on your application's Details | |
# page located at https://dev.twitter.com/apps (under "OAuth settings") | |
consumer_key="" | |
consumer_secret="" | |
# The access tokens can be found on your applications's Details | |
# page located at https://dev.twitter.com/apps (located | |
# under "Your access token") | |
access_key="" | |
access_secret="" | |
couch_url = "https://username:password@mycouchdb.url.com" | |
remote_server = couchdb.Server(couch_url) | |
bulletinbabu_db = remote_server['bulletinbabu'] | |
def get_all_tweets(screen_name): | |
#Twitter only allows access to a users most recent 3240 tweets with this method | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth) | |
#initialize a list to hold all the tweepy Tweets | |
alltweets = [] | |
#make initial request for most recent tweets (200 is the maximum allowed count) | |
new_tweets = api.user_timeline(screen_name = screen_name,count=200,tweet_mode="extended") | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until there are no tweets left to grab | |
while len(new_tweets) > 0: | |
break | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest,tweet_mode="extended") | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print "...%s tweets downloaded so far" % (len(alltweets)) | |
for tweet in alltweets: | |
print "--------------------------------------------------------------------------------------------" | |
bulletinbabu = {} | |
bulletinbabu['tw']=tweet.id | |
bulletinbabu['campaign']="#SpeakForMe" | |
bulletinbabu['_id'] = arrow.get(tweet.created_at).to('local').format('YYYY-MM-DDTHH:mm:ssZZ') | |
text = tweet.full_text.encode("utf-8") | |
print str(text) | |
if text.startswith("Emails from #SpeakForMe to:"): | |
bulletinbabu['stat']="email_sent" | |
regex_search = re.search('MPs:(.*) ', text, re.IGNORECASE) | |
if regex_search: | |
mps = regex_search.group(1) | |
mps = mps.replace(",","") | |
print str(mps) | |
bulletinbabu['mps']=int(mps.strip()) | |
regex_search = re.search('Banks:(.*) ', text, re.IGNORECASE) | |
if regex_search: | |
banks = regex_search.group(1) | |
banks = banks.replace(",","") | |
bulletinbabu['banks']=int(banks.strip()) | |
regex_search = re.search('Mobile service providers:(.*)\ ', text, re.IGNORECASE) | |
if regex_search: | |
mobile = regex_search.group(1) | |
mobile = mobile.replace(",","") | |
bulletinbabu['mobile']=int(mobile.strip()) | |
regex_search = re.search('Government services:(.*)\ ', text, re.IGNORECASE) | |
if regex_search: | |
govt = regex_search.group(1) | |
govt = govt.replace(",","") | |
bulletinbabu['govt']=int(govt.strip()) | |
regex_search = re.search('Others:(.*)\ ', text, re.IGNORECASE) | |
if regex_search: | |
others = regex_search.group(1) | |
others = others.replace(",","") | |
bulletinbabu['others']=int(others.strip()) | |
regex_search = re.search('Total:(.*)\ ', text, re.IGNORECASE) | |
if regex_search: | |
total = regex_search.group(1) | |
total = total.replace(",","") | |
bulletinbabu['total']=int(total.strip()) | |
print str(bulletinbabu) | |
try: | |
bulletinbabu_db.save(bulletinbabu) | |
except couchdb.http.ResourceConflict: | |
print "Already exists" | |
break | |
time.sleep(0.1) | |
elif text.startswith("Top recipients of #SpeakForMe emails:"): | |
#bulletinbabu['stat']="top_rcpt" | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("bulletinbabu") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment