Last active
November 3, 2017 04:27
-
-
Save skoppula/20c98ef8292ec72d1e2f to your computer and use it in GitHub Desktop.
Uncovering a Twitter handle using Twitter API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import json | |
import base64 | |
import tweepy | |
import time | |
def is_valid_handle(handle): | |
#checks if given handle matches olivia handle specs | |
return True if re.compile("^[a-z]{10,}$").match(handle) else False | |
def is_valid_date(date): | |
#might be 2012, she said sophomore year...but said 2011 | |
#did she skip a grade? smart cookie | |
return True if (date.month == 1 and (date.year == 2011)) else False | |
def is_valid_user(user): | |
#olivia probably has default profile and more than 10 favorites | |
#does she have profile picture? | |
return is_valid_date(user.created_at) and is_valid_handle(user.screen_name) and not user.following and user.default_profile and user.favourites_count > 10 | |
secrets_file = 'secrets.json' | |
with open(secrets_file) as secrets_file: | |
keys = json.load(secrets_file) | |
print 'Connecting to Twitter and authenticating' | |
auth = tweepy.OAuthHandler(keys['consumer_key'], keys['consumer_secret']) | |
auth.set_access_token(keys['access_token'], keys['access_token_secret']) | |
api = tweepy.API(auth) | |
handles = ['Royals', 'FiveThirtyEight', 'Buster_ESPN', 'MLB', 'FoxTV', 'NPR','NYTimes'] #possibly silver's personal handle as well | |
def wait_a_minute(): | |
print '\tSleeping for a minute to meet rate limit restrictions' | |
sys.stdout.write('\t') | |
for i in range(60): | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
time.sleep(1) | |
sys.stdout.write('\n') | |
for handle in handles[1:]: | |
print 'Processing', handle | |
file_name = handle+'.ids' | |
fyle = open(file_name, 'w') | |
fyle.write('{') | |
cursor = -1 | |
page_count = 1 | |
print '\tQuerying for the first page of follower IDs...' | |
response = api.followers_ids(id=handle, cursor=cursor) | |
while cursor != 0: | |
response = api.followers_ids(id=handle, cursor=cursor) | |
try: | |
ids = response[0] | |
fyle.write(str(ids)[1:-1] + ',\n') | |
fyle.flush() | |
print '\tRecieved page',page_count,'with',len(ids),'IDs!' | |
cursor = response[1][1] | |
wait_a_minute() | |
page_count += 1 | |
except: | |
print 'Excepted!', page_count, cursor, handle | |
wait_a_minute() | |
fyle.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import base64 | |
import requests | |
def is_valid(handle): | |
#checks if given handle matches olivia handle specs | |
return True if re.compile("^[a-z]{10,}$").match(handle) else False | |
get_bearer_token = False | |
secrets_file = 'secrets.json' | |
bearer_token_file = 'bearer_token.json' | |
if get_bearer_token: | |
with open(secrets_file) as secrets_file: | |
keys = json.load(secrets_file) | |
credentials = keys['consumer_key'] + ':' + keys['consumer_secret'] | |
print credentials | |
base64_bearer_token_credentials = base64.b64encode(credentials) | |
print base64_bearer_token_credentials | |
url = 'https://api.twitter.com/oauth2/token' | |
headers = {'authorization':'Basic ' + base64_bearer_token_credentials, 'content-type':'application/x-www-form-urlencoded;charset=UTF-8'} | |
payload = 'grant_type=client_credentials' | |
response = requests.post(url, headers=headers,data=payload) | |
json_bearer_token = json.loads(response.text) | |
if 'access_token' in json_bearer_token: | |
bearer_token = json_bearer_token['access_token'] | |
with open(bearer_token_file, 'w') as bearer_token_file_obj: | |
bearer_token_file_obj.write("{\"bearer_token\":\"" + bearer_token + "\"}") | |
print 'Using bearer token', bearer_token | |
else: | |
print 'Cannot get bearer token' | |
sys.exit(1) | |
else: | |
with open(bearer_token_file) as bearer_token_file: | |
bearer_token = json.load(bearer_token_file)['bearer_token'] | |
print 'Using bearer token:', bearer_token | |
handle_1 = '@NYTimes' | |
handle_2 = '@FiveThirtyEight' #possibly silver's personal handle as well | |
handle_3 = '@MLB' | |
handle_4 = '@Buster_ESPN' | |
handle_5 = '@FoxTV' | |
handle_6 = '@NPR' | |
handle_7 = '@Royals' | |
handle_8 = '@NPR' | |
get_followers(handle_8) | |
def get_followers(user_id): | |
cursor = -1 | |
authorization_header = 'Bearer ' + bearer_token | |
headers = {"authortization":authorization_header} | |
url = "https://api.twitter.com/1.1/followers/ids.json?cursor=" + str(cursor) + "&user_id=" | |
response = requests.get(url, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I was challenged this summer by a close someone to take a bet. The terms? To find her Twitter handle using less than 10 reasonable questions, or otherwise take her to dinner. I myself stood to gain half a goat, a donation through Oxfam she promised to make should I win the bet.
With my pride and tangible items on the line, I thought I’d use my only trick: Twitter’s API.
How I used my questions
To get a pool of handles to search through, I first asked her if she followed New York Times, FiveThirtyEight, and a few other large names accounts that a college-age student might follow. Then, to narrow down the pool, I used my remaining available questions to ask specifics of the handle: does the handle of more than 10 characters? Does it contain an ‘e’? Does it contain an ‘n’?
The execution
Using Twitter’s API, I first queried and saved the large pools of user IDs that followed the news sources I knew she followed. Twitter, unfortunately, rate limited certain query types, so I had to do a round-about query of first the user ID’s. Then intersect these ID pools, to get a smaller pool of potential ID’s. Then query each user ID to get the corresponding Twitter handle. I filtered these handles (roughly 3,000) based on the answers to the above three questions, leading to roughly 20 handles which I manually examined and filtered. Lucky for me, despite the lack of any personally identifiable information on any of the twenty accounts, there was only one account that was clearly hers.
The gists below were templates I made for querying Twitter API. Most of the set intersection and filtering was done in iPython, output of which was not saved (sorry).