Skip to content

Instantly share code, notes, and snippets.

@justindavies
Created June 11, 2017 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save justindavies/ad396edb6baf452b894d7903dd0bfcd7 to your computer and use it in GitHub Desktop.
Save justindavies/ad396edb6baf452b894d7903dd0bfcd7 to your computer and use it in GitHub Desktop.
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import sys
from newspaper import Article
from time import mktime
auth = tweepy.OAuthHandler("XXXXXXXXXXXX")
auth.set_access_token("XXXXXXXXXXXX")
api = tweepy.API(auth,wait_on_rate_limit=True)
def process_status(status):
for url in status.entities['urls']:
timestamp = mktime(status.created_at.timetuple())
try:
print "Attempting to get article: " + url['expanded_url']
article = Article(url['expanded_url'])
article.download()
article.parse()
article.nlp()
title = article.title
summary = article.summary
text = article.text
article_json = {
'publish_date': timestamp,
'url': url['expanded_url'],
'title': title,
'summary': summary,
'text': text,
'keywords': ','.join(article.keywords),
'source': status.author.screen_name
}
if len(text) > 100:
print(article_json)
except:
print("Error")
for friend in tweepy.Cursor(api.friends).items():
print(friend.id)
for status in tweepy.Cursor(api.user_timeline, id=friend.id).items(500):
process_status(status)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment