thejeshgn/bulletinbabu_tweets.py

## bulletinbabu_tweets.py
#!/usr/bin/env python
# encoding: utf-8
import couchdb
import tweepy #https://github.com/tweepy/tweepy
import csv
import re
import arrow
import time

# The consumer keys can be found on your application's Details
# page located at https://dev.twitter.com/apps (under "OAuth settings")
consumer_key=""
consumer_secret=""

# The access tokens can be found on your applications's Details
# page located at https://dev.twitter.com/apps (located
# under "Your access token")
access_key=""
access_secret=""


couch_url = "https://username:password@mycouchdb.url.com"

remote_server = couchdb.Server(couch_url)
bulletinbabu_db = remote_server['bulletinbabu']

def get_all_tweets(screen_name):
	#Twitter only allows access to a users most recent 3240 tweets with this method

	#authorize twitter, initialize tweepy
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_key, access_secret)
	api = tweepy.API(auth)

	#initialize a list to hold all the tweepy Tweets
	alltweets = []

	#make initial request for most recent tweets (200 is the maximum allowed count)
	new_tweets = api.user_timeline(screen_name = screen_name,count=200,tweet_mode="extended")

	#save most recent tweets
	alltweets.extend(new_tweets)

	#save the id of the oldest tweet less one
	oldest = alltweets[-1].id - 1

	#keep grabbing tweets until there are no tweets left to grab
	while len(new_tweets) > 0:
		break

		#all subsiquent requests use the max_id param to prevent duplicates
		new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest,tweet_mode="extended")

		#save most recent tweets
		alltweets.extend(new_tweets)

		#update the id of the oldest tweet less one
		oldest = alltweets[-1].id - 1

		print "...%s tweets downloaded so far" % (len(alltweets))


	for tweet in alltweets:
		print "--------------------------------------------------------------------------------------------"
		bulletinbabu = {}
		bulletinbabu['tw']=tweet.id
		bulletinbabu['campaign']="#SpeakForMe"
		bulletinbabu['_id'] = arrow.get(tweet.created_at).to('local').format('YYYY-MM-DDTHH:mm:ssZZ')
		text =  tweet.full_text.encode("utf-8")
		print str(text)
		if text.startswith("Emails from #SpeakForMe to:"):
			bulletinbabu['stat']="email_sent"
			regex_search = re.search('MPs:(.*) ', text, re.IGNORECASE)
			if regex_search:
				mps = regex_search.group(1)
				mps = mps.replace(",","")
				print str(mps)
				bulletinbabu['mps']=int(mps.strip())

			regex_search = re.search('Banks:(.*) ', text, re.IGNORECASE)
			if regex_search:
				banks = regex_search.group(1)
				banks = banks.replace(",","")
				bulletinbabu['banks']=int(banks.strip())

			regex_search = re.search('Mobile service providers:(.*)\ ', text, re.IGNORECASE)
			if regex_search:
				mobile = regex_search.group(1)
				mobile = mobile.replace(",","")
				bulletinbabu['mobile']=int(mobile.strip())

			regex_search = re.search('Government services:(.*)\ ', text, re.IGNORECASE)
			if regex_search:
				govt = regex_search.group(1)
				govt = govt.replace(",","")
				bulletinbabu['govt']=int(govt.strip())

			regex_search = re.search('Others:(.*)\ ', text, re.IGNORECASE)
			if regex_search:
				others = regex_search.group(1)
				others = others.replace(",","")
				bulletinbabu['others']=int(others.strip())

			regex_search = re.search('Total:(.*)\ ', text, re.IGNORECASE)
			if regex_search:
				total = regex_search.group(1)
				total = total.replace(",","")
				bulletinbabu['total']=int(total.strip())
			print str(bulletinbabu)
			try:
				bulletinbabu_db.save(bulletinbabu)
			except couchdb.http.ResourceConflict:
				print "Already exists"
				break
			time.sleep(0.1)
		elif text.startswith("Top recipients of #SpeakForMe emails:"):
			#bulletinbabu['stat']="top_rcpt"
			pass


if __name__ == '__main__':
	#pass in the username of the account you want to download
	get_all_tweets("bulletinbabu")
	#!/usr/bin/env python
	# encoding: utf-8
	import couchdb
	import tweepy #https://github.com/tweepy/tweepy
	import csv
	import re
	import arrow
	import time

	# The consumer keys can be found on your application's Details
	# page located at https://dev.twitter.com/apps (under "OAuth settings")
	consumer_key=""
	consumer_secret=""

	# The access tokens can be found on your applications's Details
	# page located at https://dev.twitter.com/apps (located
	# under "Your access token")
	access_key=""
	access_secret=""


	couch_url = "https://username:password@mycouchdb.url.com"

	remote_server = couchdb.Server(couch_url)
	bulletinbabu_db = remote_server['bulletinbabu']

	def get_all_tweets(screen_name):
	#Twitter only allows access to a users most recent 3240 tweets with this method

	#authorize twitter, initialize tweepy
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_key, access_secret)
	api = tweepy.API(auth)

	#initialize a list to hold all the tweepy Tweets
	alltweets = []

	#make initial request for most recent tweets (200 is the maximum allowed count)
	new_tweets = api.user_timeline(screen_name = screen_name,count=200,tweet_mode="extended")

	#save most recent tweets
	alltweets.extend(new_tweets)

	#save the id of the oldest tweet less one
	oldest = alltweets[-1].id - 1

	#keep grabbing tweets until there are no tweets left to grab
	while len(new_tweets) > 0:
	break

	#all subsiquent requests use the max_id param to prevent duplicates
	new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest,tweet_mode="extended")

	#save most recent tweets
	alltweets.extend(new_tweets)

	#update the id of the oldest tweet less one
	oldest = alltweets[-1].id - 1

	print "...%s tweets downloaded so far" % (len(alltweets))



	for tweet in alltweets:
	print "--------------------------------------------------------------------------------------------"
	bulletinbabu = {}
	bulletinbabu['tw']=tweet.id
	bulletinbabu['campaign']="#SpeakForMe"
	bulletinbabu['_id'] = arrow.get(tweet.created_at).to('local').format('YYYY-MM-DDTHH:mm:ssZZ')
	text = tweet.full_text.encode("utf-8")
	print str(text)
	if text.startswith("Emails from #SpeakForMe to:"):
	bulletinbabu['stat']="email_sent"
	regex_search = re.search('MPs:(.*) ', text, re.IGNORECASE)
	if regex_search:
	mps = regex_search.group(1)
	mps = mps.replace(",","")
	print str(mps)
	bulletinbabu['mps']=int(mps.strip())

	regex_search = re.search('Banks:(.*) ', text, re.IGNORECASE)
	if regex_search:
	banks = regex_search.group(1)
	banks = banks.replace(",","")
	bulletinbabu['banks']=int(banks.strip())

	regex_search = re.search('Mobile service providers:(.*)\ ', text, re.IGNORECASE)
	if regex_search:
	mobile = regex_search.group(1)
	mobile = mobile.replace(",","")
	bulletinbabu['mobile']=int(mobile.strip())

	regex_search = re.search('Government services:(.*)\ ', text, re.IGNORECASE)
	if regex_search:
	govt = regex_search.group(1)
	govt = govt.replace(",","")
	bulletinbabu['govt']=int(govt.strip())

	regex_search = re.search('Others:(.*)\ ', text, re.IGNORECASE)
	if regex_search:
	others = regex_search.group(1)
	others = others.replace(",","")
	bulletinbabu['others']=int(others.strip())

	regex_search = re.search('Total:(.*)\ ', text, re.IGNORECASE)
	if regex_search:
	total = regex_search.group(1)
	total = total.replace(",","")
	bulletinbabu['total']=int(total.strip())
	print str(bulletinbabu)
	try:
	bulletinbabu_db.save(bulletinbabu)
	except couchdb.http.ResourceConflict:
	print "Already exists"
	break
	time.sleep(0.1)
	elif text.startswith("Top recipients of #SpeakForMe emails:"):
	#bulletinbabu['stat']="top_rcpt"
	pass



	if __name__ == '__main__':
	#pass in the username of the account you want to download
	get_all_tweets("bulletinbabu")