Skip to content

Instantly share code, notes, and snippets.

@varpurantala
Forked from mjcreativeventures/get_followers.py
Last active July 9, 2018 19:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save varpurantala/7df9e95755080655f6b39f85f44a009f to your computer and use it in GitHub Desktop.
Save varpurantala/7df9e95755080655f6b39f85f44a009f to your computer and use it in GitHub Desktop.
Collect twitter followers
import tweepy
import time
import os
import sys
import json
import argparse
FOLLOWING_DIR = 'following'
MAX_FRIENDS = 200
FRIENDS_OF_FRIENDS_LIMIT = 200
if not os.path.exists(FOLLOWING_DIR):
os.makedir(FOLLOWING_DIR)
enc = lambda x: x.encode('ascii', errors='ignore')
# The consumer keys can be found on your application's Details
# page located at https://dev.twitter.com/apps (under "OAuth settings")
CONSUMER_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXX'
CONSUMER_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
# The access tokens can be found on your applications's Details
# page located at https://dev.twitter.com/apps (located
# under "Your access token")
ACCESS_TOKEN = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
ACCESS_TOKEN_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
# == OAuth Authentication ==
#
# This mode of authentication is the new preferred way
# of authenticating with Twitter.
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
def get_follower_ids(centre, max_depth=1, current_depth=0, taboo_list=[]):
# print 'current depth: %d, max depth: %d' % (current_depth, max_depth)
# print 'taboo list: ', ','.join([ str(i) for i in taboo_list ])
if current_depth == max_depth:
print 'out of depth'
return taboo_list
if centre in taboo_list:
# we've been here before
print 'Already been here.'
return taboo_list
else:
taboo_list.append(centre)
try:
userfname = os.path.join('twitter-users/', str(centre) + '.json')
if not os.path.exists(userfname):
print 'Retrieving user details for twitter id %s' % str(centre)
while True:
try:
user = api.get_user(centre)
d = {'name': user.name,
'screen_name': user.screen_name,
'id': user.id,
'friends_count': user.friends_count,
'followers_count': user.followers_count,
'followers_ids': user.followers_ids()}
with open(userfname, 'w') as outf:
outf.write(json.dumps(d, indent=1))
user = d
break
except tweepy.TweepError, error:
print type(error)
if str(error) == 'Not authorized.':
print 'Can''t access user data - not authorized.'
return taboo_list
if str(error) == 'User has been suspended.':
print 'User suspended.'
return taboo_list
errorObj = error[0][0]
print errorObj
if errorObj['message'] == 'Rate limit exceeded':
print 'Rate limited. Sleeping for 15 minutes.'
time.sleep(15 * 60 + 15)
continue
return taboo_list
else:
user = json.loads(file(userfname).read())
screen_name = enc(user['screen_name'])
fname = os.path.join(FOLLOWING_DIR, screen_name + '.csv')
friendids = []
# only retrieve friends of TED... screen names
if screen_name.startswith('TED'):
if not os.path.exists(fname):
print 'No cached data for screen name "%s"' % screen_name
with open(fname, 'w') as outf:
params = (enc(user['name']), screen_name)
print 'Retrieving friends for user "%s" (%s)' % params
# page over friends
c = tweepy.Cursor(api.friends, id=user['id']).items()
friend_count = 0
while True:
try:
friend = c.next()
friendids.append(friend.id)
params = (friend.id, enc(friend.screen_name), enc(friend.name))
outf.write('%s\t%s\t%s\n' % params)
friend_count += 1
if friend_count >= MAX_FRIENDS:
print 'Reached max no. of friends for "%s".' % friend.screen_name
break
except tweepy.TweepError:
# hit rate limit, sleep for 15 minutes
print 'Rate limited. Sleeping for 15 minutes.'
time.sleep(15 * 60 + 15)
continue
except StopIteration:
break
else:
friendids = [int(line.strip().split('\t')[0]) for line in file(fname)]
print 'Found %d friends for %s' % (len(friendids), screen_name)
# get friends of friends
cd = current_depth
if cd+1 < max_depth:
for fid in friendids[:FRIENDS_OF_FRIENDS_LIMIT]:
taboo_list = get_follower_ids(fid, max_depth=max_depth,
current_depth=cd+1, taboo_list=taboo_list)
if cd+1 < max_depth and len(friendids) > FRIENDS_OF_FRIENDS_LIMIT:
print 'Not all friends retrieved for %s.' % screen_name
except Exception, error:
print 'Error retrieving followers for user id: ', centre
print error
if os.path.exists(fname):
os.remove(fname)
print 'Removed file "%s".' % fname
sys.exit(1)
return taboo_list
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument("-s", "--screen-name", required=True, help="Screen name of twitter user")
ap.add_argument("-d", "--depth", required=True, type=int, help="How far to follow user network")
args = vars(ap.parse_args())
twitter_screenname = args['screen_name']
depth = int(args['depth'])
if depth < 1 or depth > 3:
print 'Depth value %d is not valid. Valid range is 1-3.' % depth
sys.exit('Invalid depth argument.')
print 'Max Depth: %d' % depth
matches = api.lookup_users(screen_names=[twitter_screenname])
if len(matches) == 1:
print get_follower_ids(matches[0].id, max_depth=depth)
else:
print 'Sorry, could not find twitter user with screen name: %s' % twitter_screenname
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment