Skip to content

Instantly share code, notes, and snippets.

@wmpay
Created October 18, 2015 18:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wmpay/a83ace4256cccdb2115f to your computer and use it in GitHub Desktop.
Save wmpay/a83ace4256cccdb2115f to your computer and use it in GitHub Desktop.
import json
import twitter
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import exc as sqlExc
from sqlalchemy.sql import exists
from user import User, UserBase
from follow import Follow, FollowBase
from friend import Friend, FriendBase
from time import sleep
from datetime import datetime as dt
from optparse import OptionParser
import logging as log
def main():
p = OptionParser(description='Downloads data for a twitter user.')
p.add_option('-v','--verbose', action='store_true', default=False, help='Toggle info and debug messages.')
p.add_option('-i','--info', action='store_true', default=False, help='Toggle info messages.')
p.add_option('-H', '--handle', dest='handle',default=None, metavar='handle', help='Input handle to get data for.')
(options, args) = p.parse_args()
if options.info:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
elif options.verbose:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
else:
log.basicConfig(format="%(levelname)s: %(message)s")
engine = create_engine('mysql://root:b1gd4t4@localhost/tapptv_twitter?charset=utf8')
UserBase.metadata.create_all(engine, checkfirst=True)
FollowBase.metadata.create_all(engine, checkfirst=True)
Session = sessionmaker(bind=engine)
session = Session()
authIndex = 0
authJson = open("OAuthCodes.json")
authCodes = json.load(authJson)
api = accessApi(authIndex, authCodes)
if not options.handle:
handle = raw_input("Download followers for which handle? ")
else:
handle = options.handle
userObj = api.GetUser(screen_name=handle)
log.info("Getting followers of %s" % handle)
userId = userObj.GetId()
tolerance = .15
follows = session.query(Follow).filter_by(userId=userObj.GetId()).all()
if ((abs(len(follows)-userObj.GetFollowersCount())/userObj.GetFollowersCount()) > tolerance):
log.info("Updating follows for user %s..." % handle)
deleteFollowers(session,userId)
authIndex = downloadFollowers(session,userId,authIndex,authCodes)
else:
log.info("Already have followers for user %s" % handle)
followers = session.query(Follow).filter(Follow.userId==userId)
for follower in followers:
(ret, ), = session.query(exists().where(Follow.userId==userId))
if ret
log.info("Already have followers for user %d" % follower.followerId)
else:
authIndex = downloadFollowers(session,follower.followerId,authIndex,authCodes)
log.info("Download complete.")
session.close()
authJson.close()
def accessApi(authIndex,authCodes):
log.info("Accessing Api using OAuth code with id %s" % authCodes[authIndex]['id'])
try:
api = twitter.Api(authCodes[authIndex]['consumerKey'], authCodes[authIndex]['consumerSecret'],
authCodes[authIndex]['accessTokenKey'], authCodes[authIndex]['accessTokenSecret'])
except twitter.error.TwitterError:
log.warn("Error authenticating Api using OAuth code with id %d, owned by %s." % (authCodes[authIndex]['id'], authCodes[authIndex]['owner']))
return api
def downloadFollowers(session,userId,authIndex,authCodes):
cursor = -1
followers = []
followersGot = 0
notAuthorized = False
while cursor != 0:
if len(authCodes)-1 == authIndex:
authIndex = 0
api = accessApi(authIndex,authCodes)
rateLimitStatus = api.GetRateLimitStatus()
if rateLimitStatus['resources']['followers']['/followers/ids']['remaining'] == 0:
tReset = dt.fromtimestamp(rateLimitStatus['resources']['followers']['/followers/ids']['reset'])
sleepTime = tReset - dt.now()
log.info("Sleeping for %d seconds..." % sleepTime.seconds)
sleep(sleepTime.seconds)
api = accessApi(authIndex,authCodes)
else:
if not 'api' in locals():
api = accessApi(authIndex,authCodes)
rateLimitStatus = api.GetRateLimitStatus()
if rateLimitStatus['resources']['followers']['/followers/ids']['remaining'] == 0:
log.info("Rate Limit met for this auth code.")
authIndex+=1
api = accessApi(authIndex,authCodes)
i = 0
while i < rateLimitStatus['resources']['followers']['/followers/ids']['remaining']:
try:
data = api.GetFollowerIDs(user_id=userId,total_count=5000,cursor=cursor)
except twitter.error.TwitterError:
notAuthorized = True
break
followers.extend(data[0])
cursor = data[1]
if cursor == 0:
break
i+=1
if notAuthorized == True: break
if len(followers) is not 0:
log.info("Committing followers for user %d." % userId)
insertFollowers(session,userId,followers)
followers = []
if notAuthorized == False:
log.info("Got followers for user %d." % userId)
else:
log.info("Not authorized to get followers for user %d." % userId)
return authIndex
def insertFollowers(session,userId,followers):
for followerId in followers:
follow = Follow(userId=userId,followerId=followerId)
session.add(follow)
session.commit()
def deleteFollowers(session,userId):
log.info("Deleting followers for user %d" % userId)
session.query(Follow).filter(Follow.userId==userId).delete()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment