Skip to content

Instantly share code, notes, and snippets.

@dgouldin
Created June 6, 2015 09:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgouldin/3ef28e9b16f7d143274f to your computer and use it in GitHub Desktop.
Save dgouldin/3ef28e9b16f7d143274f to your computer and use it in GitHub Desktop.
Twitter friend similarity
from __future__ import division
import json
import os
import urlparse
from collections import defaultdict
from datetime import datetime
from dateutil.relativedelta import relativedelta
from requests import HTTPError
from requests_oauthlib import OAuth1Session
twitter = OAuth1Session(
client_key=os.environ['CLIENT_KEY'],
client_secret=os.environ['CLIENT_SECRET'],
resource_owner_key=os.environ['RESOURCE_OWNER_KEY'],
resource_owner_secret=os.environ['RESOURCE_OWNER_SECRET'],
)
def get(path, *args, **kwargs):
url = urlparse.urljoin('https://api.twitter.com/1.1/', path)
return twitter.get(url, *args, **kwargs)
cache = os.environ.get('CACHE')
if cache and not os.path.exists(cache):
os.makedirs(cache)
def friends(id=None, screen_name=None):
# TODO: support pagination
if cache:
cache_filename = os.path.join(cache, '{}'.format(id or screen_name))
if os.path.exists(cache_filename):
with open(cache_filename, 'r') as f:
return json.loads(f.read())
params = {
'count': 5000,
'id': id,
'screen_name': screen_name,
}
r = get('friends/ids.json', params=params)
r.raise_for_status()
ids = r.json()['ids']
if cache:
with open(cache_filename, 'w') as f:
f.write(json.dumps(ids))
return ids
graph = defaultdict(lambda: set([]))
graph['dgouldin'] = set(friends(screen_name='dgouldin'))
for id in sorted(graph['dgouldin']):
try:
ids = friends(id=id)
except HTTPError as e:
if e.response.status_code == 429:
reset = datetime.fromtimestamp(int(
e.response.headers['x-rate-limit-reset']))
delta = relativedelta(reset, datetime.now())
print 'rate limit hit, wait {}m:{}s'.format(delta.minutes,
delta.seconds)
break
else:
graph[id] |= set(ids) & graph['dgouldin']
similarity = len(graph[id]) / len(graph['dgouldin'])
print '{}: {:.2f}% similar'.format(id, similarity * 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment