Created
September 28, 2018 02:29
-
-
Save dyerrington/134ed64b07a0bc0bfcce4945eaa0bec5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tweepy | |
import wget | |
import os | |
oauth = { | |
"consumer_key": "", | |
"consumer_secret": "" | |
} | |
access = { | |
"access_token": "", | |
"access_token_secret": "" | |
} | |
auth = tweepy.OAuthHandler(**oauth) #Fill these in | |
auth.set_access_token(*access.values()) #Fill these in | |
api = tweepy.API(auth) | |
#Get 200 of Chris' tweet | |
tweets = api.user_timeline(screen_name = 'chrisalbon', | |
count = 200, | |
include_rts = False, | |
excludereplies = True) | |
#200 isn't enough. Keep getting tweets until we can't get anymore | |
last_id = tweets[-1].id | |
while (True): | |
more_tweets = api.user_timeline(screen_name='chrisalbon', | |
count=200, | |
include_rts=False, | |
exclude_replies=True, | |
max_id=last_id-1) | |
# There are no more tweets | |
if (len(more_tweets) == 0): | |
break | |
else: | |
last_id = more_tweets[-1].id-1 | |
tweets += more_tweets | |
#Chris stopped using a hashtag and started linking a URL | |
def has_ML_url(s): | |
urls = s.entities.get('urls') | |
if urls: | |
return(urls[0].get('display_url') == 'machinelearningflashcards.com') | |
else: | |
return False | |
#Filter by those containing machinelearningflashcards.com | |
card_tweets = [tweet for tweet in tweets if has_ML_url(tweet)] | |
media_files = dict() | |
for status in card_tweets: | |
title = status.text.split(' http')[0] | |
media = status.entities.get('media', []) | |
if(len(media) > 0 and media[0]['type']=='photo' ): #if tweet has media and media is photo | |
media_files[title] = media[0]['media_url'] #get me the url | |
os.makedirs('ml-cards', exist_ok=True) #make a directory to store the photos in | |
for title, url in media_files.items(): | |
wget.download(url, out = "ml-cards/{}.png".format(title)) #get the photos! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment