Created
October 18, 2015 18:51
-
-
Save wmpay/a83ace4256cccdb2115f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import twitter | |
from sqlalchemy import create_engine | |
from sqlalchemy.orm import sessionmaker | |
from sqlalchemy.orm import exc as sqlExc | |
from sqlalchemy.sql import exists | |
from user import User, UserBase | |
from follow import Follow, FollowBase | |
from friend import Friend, FriendBase | |
from time import sleep | |
from datetime import datetime as dt | |
from optparse import OptionParser | |
import logging as log | |
def main(): | |
p = OptionParser(description='Downloads data for a twitter user.') | |
p.add_option('-v','--verbose', action='store_true', default=False, help='Toggle info and debug messages.') | |
p.add_option('-i','--info', action='store_true', default=False, help='Toggle info messages.') | |
p.add_option('-H', '--handle', dest='handle',default=None, metavar='handle', help='Input handle to get data for.') | |
(options, args) = p.parse_args() | |
if options.info: | |
log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO) | |
elif options.verbose: | |
log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) | |
else: | |
log.basicConfig(format="%(levelname)s: %(message)s") | |
engine = create_engine('mysql://root:b1gd4t4@localhost/tapptv_twitter?charset=utf8') | |
UserBase.metadata.create_all(engine, checkfirst=True) | |
FollowBase.metadata.create_all(engine, checkfirst=True) | |
Session = sessionmaker(bind=engine) | |
session = Session() | |
authIndex = 0 | |
authJson = open("OAuthCodes.json") | |
authCodes = json.load(authJson) | |
api = accessApi(authIndex, authCodes) | |
if not options.handle: | |
handle = raw_input("Download followers for which handle? ") | |
else: | |
handle = options.handle | |
userObj = api.GetUser(screen_name=handle) | |
log.info("Getting followers of %s" % handle) | |
userId = userObj.GetId() | |
tolerance = .15 | |
follows = session.query(Follow).filter_by(userId=userObj.GetId()).all() | |
if ((abs(len(follows)-userObj.GetFollowersCount())/userObj.GetFollowersCount()) > tolerance): | |
log.info("Updating follows for user %s..." % handle) | |
deleteFollowers(session,userId) | |
authIndex = downloadFollowers(session,userId,authIndex,authCodes) | |
else: | |
log.info("Already have followers for user %s" % handle) | |
followers = session.query(Follow).filter(Follow.userId==userId) | |
for follower in followers: | |
(ret, ), = session.query(exists().where(Follow.userId==userId)) | |
if ret | |
log.info("Already have followers for user %d" % follower.followerId) | |
else: | |
authIndex = downloadFollowers(session,follower.followerId,authIndex,authCodes) | |
log.info("Download complete.") | |
session.close() | |
authJson.close() | |
def accessApi(authIndex,authCodes): | |
log.info("Accessing Api using OAuth code with id %s" % authCodes[authIndex]['id']) | |
try: | |
api = twitter.Api(authCodes[authIndex]['consumerKey'], authCodes[authIndex]['consumerSecret'], | |
authCodes[authIndex]['accessTokenKey'], authCodes[authIndex]['accessTokenSecret']) | |
except twitter.error.TwitterError: | |
log.warn("Error authenticating Api using OAuth code with id %d, owned by %s." % (authCodes[authIndex]['id'], authCodes[authIndex]['owner'])) | |
return api | |
def downloadFollowers(session,userId,authIndex,authCodes): | |
cursor = -1 | |
followers = [] | |
followersGot = 0 | |
notAuthorized = False | |
while cursor != 0: | |
if len(authCodes)-1 == authIndex: | |
authIndex = 0 | |
api = accessApi(authIndex,authCodes) | |
rateLimitStatus = api.GetRateLimitStatus() | |
if rateLimitStatus['resources']['followers']['/followers/ids']['remaining'] == 0: | |
tReset = dt.fromtimestamp(rateLimitStatus['resources']['followers']['/followers/ids']['reset']) | |
sleepTime = tReset - dt.now() | |
log.info("Sleeping for %d seconds..." % sleepTime.seconds) | |
sleep(sleepTime.seconds) | |
api = accessApi(authIndex,authCodes) | |
else: | |
if not 'api' in locals(): | |
api = accessApi(authIndex,authCodes) | |
rateLimitStatus = api.GetRateLimitStatus() | |
if rateLimitStatus['resources']['followers']['/followers/ids']['remaining'] == 0: | |
log.info("Rate Limit met for this auth code.") | |
authIndex+=1 | |
api = accessApi(authIndex,authCodes) | |
i = 0 | |
while i < rateLimitStatus['resources']['followers']['/followers/ids']['remaining']: | |
try: | |
data = api.GetFollowerIDs(user_id=userId,total_count=5000,cursor=cursor) | |
except twitter.error.TwitterError: | |
notAuthorized = True | |
break | |
followers.extend(data[0]) | |
cursor = data[1] | |
if cursor == 0: | |
break | |
i+=1 | |
if notAuthorized == True: break | |
if len(followers) is not 0: | |
log.info("Committing followers for user %d." % userId) | |
insertFollowers(session,userId,followers) | |
followers = [] | |
if notAuthorized == False: | |
log.info("Got followers for user %d." % userId) | |
else: | |
log.info("Not authorized to get followers for user %d." % userId) | |
return authIndex | |
def insertFollowers(session,userId,followers): | |
for followerId in followers: | |
follow = Follow(userId=userId,followerId=followerId) | |
session.add(follow) | |
session.commit() | |
def deleteFollowers(session,userId): | |
log.info("Deleting followers for user %d" % userId) | |
session.query(Follow).filter(Follow.userId==userId).delete() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment