Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import sys
from newspaper import Article
from time import mktime
auth = tweepy.OAuthHandler("XXXXXXXXXXXX")
auth.set_access_token("XXXXXXXXXXXX")
api = tweepy.API(auth,wait_on_rate_limit=True)
def process_status(status):
for url in status.entities['urls']:
timestamp = mktime(status.created_at.timetuple())
try:
print "Attempting to get article: " + url['expanded_url']
article = Article(url['expanded_url'])
article.download()
article.parse()
article.nlp()
title = article.title
summary = article.summary
text = article.text
article_json = {
'publish_date': timestamp,
'url': url['expanded_url'],
'title': title,
'summary': summary,
'text': text,
'keywords': ','.join(article.keywords),
'source': status.author.screen_name
}
if len(text) > 100:
print(article_json)
except:
print("Error")
for friend in tweepy.Cursor(api.friends).items():
print(friend.id)
for status in tweepy.Cursor(api.user_timeline, id=friend.id).items(500):
process_status(status)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment