Skip to content

Instantly share code, notes, and snippets.

@jokull
Created May 17, 2016 08:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jokull/b87ceceb4ffcb5db807bd27a00bb13a4 to your computer and use it in GitHub Desktop.
Save jokull/b87ceceb4ffcb5db807bd27a00bb13a4 to your computer and use it in GitHub Desktop.
Find follower accounts that have profile pics with faces in them
OAUTH_CONSUMER_KEY=
OAUTH_CONSUMER_SECRET=
OAUTH_TOKEN=
OAUTH_TOKEN_SECRET=
IMGIX_TOKEN=
imgix==1.0.0
requests==2.9.1
requests-oauthlib==0.6.0
tablib==0.11.2
python-dotenv==0.3.0
# encoding=utf-8
import sys
import shelve
import os
import json
from os.path import join, dirname
from clint.textui import progress
from dotenv import load_dotenv
load_dotenv(join(dirname(__file__), '.env'))
import requests
from requests_oauthlib import OAuth1Session
import imgix
import tablib
builder = imgix.UrlBuilder(
"takumi-twitter-dp.imgix.net",
sign_key=os.environ['IMGIX_TOKEN']
)
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in xrange(0, len(l), n):
yield l[i:i + n]
twitter = OAuth1Session(os.environ['OAUTH_CONSUMER_KEY'],
client_secret=os.environ['OAUTH_CONSUMER_SECRET'],
resource_owner_key=os.environ['OAUTH_TOKEN'],
resource_owner_secret=os.environ['OAUTH_TOKEN_SECRET'])
api_url = 'https://api.twitter.com/1.1'
url = api_url + '/users/lookup.json'
def main(path, size=None):
# Split into chunks of 100
book = tablib.Dataset(headers=['ID', 'Name', 'Handle', 'Bio', 'Image', 'Location', 'Verified', 'URL'])
scratch = shelve.open('.scratch', writeback=True)
scratch.setdefault('skippable', [])
followers = map(str, json.load(sys.stdin)['data']['followers'])
sys.stdin = open('/dev/tty')
if os.path.exists(path):
with open(path) as fp:
book.load(fp.read())
booked_ids = book['ID']
if size is not None:
followers = followers[:int(size)]
for i, _followers in enumerate(progress.bar(list(chunks(followers, 100)))):
_followers = [f for f in _followers if f not in (booked_ids + scratch['skippable'])]
if not _followers:
continue
response = twitter.get(
url + '?user_id=' + ','.join(_followers),
headers={'content-type': 'application/json'},
)
for user in response.json():
if user['id_str'] in (booked_ids + scratch['skippable']):
continue
imgix_url = builder.create_url(user['profile_image_url'], opts=dict(faces=1, fm='json'))
imgix_response = requests.get(imgix_url)
if imgix_response.status_code != 200:
continue
if imgix_response.json().get('Faces'):
book.append((
user['id_str'],
user['name'],
user['screen_name'],
user['description'],
user['profile_image_url'],
user['location'],
user['verified'],
user['url'],
))
else:
scratch['skippable'].append(user['id_str'])
with open(path, 'w') as fp:
fp.write(book.csv)
scratch.sync()
scratch.close()
if __name__ == "__main__":
from ipdb import launch_ipdb_on_exception
with launch_ipdb_on_exception():
main(*sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment