JarrydWannenburg/newspaper3k_extraction.py

## newspaper3k_extraction.py
# Assign just the information on the articles to our wells_fargo obj
wells_fargo = wells_fargo['articles'] # 100 is the max length of articles to return

# Extract the urls for each article returned by newsAPI
wells_fargo_urls = [i['url'] for i in wells_fargo]

# Using newspaper3k, create a function to return an article given its URL
# See https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html for more detail
def get_article(url):
    article = Article(url, fetch_images=False, memoize_articles = False)
    article.download()
    article.parse()
    return article

# For all urls returned by the keyword search, use newspaper3k to extract the article as an obj
wells_fargo_articles = [get_article(i) for i in wells_fargo_urls]
	# Assign just the information on the articles to our wells_fargo obj
	wells_fargo = wells_fargo['articles'] # 100 is the max length of articles to return

	# Extract the urls for each article returned by newsAPI
	wells_fargo_urls = [i['url'] for i in wells_fargo]

	# Using newspaper3k, create a function to return an article given its URL
	# See https://newspaper.readthedocs.io/en/latest/user_guide/quickstart.html for more detail
	def get_article(url):
	article = Article(url, fetch_images=False, memoize_articles = False)
	article.download()
	article.parse()
	return article

	# For all urls returned by the keyword search, use newspaper3k to extract the article as an obj
	wells_fargo_articles = [get_article(i) for i in wells_fargo_urls]