JarrydWannenburg/get_details_function.py

## get_details_function.py
# Best format for published date actually comes from the newsAPI pull object, so let's make a list of those dates
wells_fargo_publishedAt = [i['publishedAt'][0:10] for i in wells_fargo] # [0:10] returns just YYYY-MM-DD

# Create our function that takes a list of Article objects and returns one dataframe
def get_details(article_list):
    # Initialize empty lists for dictionaries later on
    titles = []
    urls = []
    authors = []
    keywords =[]
    publishedAt = wells_fargo_publishedAt
    descriptions = []
    content = []


    # For-loop to append the article's details for each Article object
    for article in article_list:
        titles.append(article.title)
        authors.append(article.authors)
        urls.append(article.url)
        content.append(article.text)

        # There are three locations for keywords. News Keywords is best source.
        # If statement to hierarchically append keywords based on preference order
        if len(article.meta_data['news_keywords']) > 0:
            keywords.append(article.meta_data['news_keywords'].split(', '))
        elif len(article.meta_keywords[0]) > 0: # index 0 because no metakeywords returns[''] and len(['']) = 1
            keywords.append(article.meta_keywords)
        elif len(article.keywords) > 0:
            keywords.append(article.keywords)
        else:
            keywords.append([])

        # There are three locations for description. Meta_data description is the best source.
        # If statement to hierarchically append descriptions/summaries based on preference order.
        if len(article.meta_description) > 0:
            descriptions.append(article.meta_description)
        elif len(article.meta_data['description']) > 0:
            descriptions.append(article.meta_data['description'])
        elif len(article.summary) > 0:
            descriptions.append(article.summary)
        else:
            descriptions.append('')

    # Create a dictionary with the information collected for each article
    article_dict = {'Title':titles,
                    'Description':descriptions,
                    'Published': publishedAt,
                    'Keywords':keywords,
                    'Content':content,
                    'Authors':authors,
                    'URL':urls}

    # Using the dictionary above, create a dataframe to return
    article_df = pd.DataFrame(article_dict)

    return(article_df)

# Using the function above, extract our dataframe
wells_fargo_df = get_details(wells_fargo_articles)
wells_fargo_df.head()
	# Best format for published date actually comes from the newsAPI pull object, so let's make a list of those dates
	wells_fargo_publishedAt = [i['publishedAt'][0:10] for i in wells_fargo] # [0:10] returns just YYYY-MM-DD

	# Create our function that takes a list of Article objects and returns one dataframe
	def get_details(article_list):
	# Initialize empty lists for dictionaries later on
	titles = []
	urls = []
	authors = []
	keywords =[]
	publishedAt = wells_fargo_publishedAt
	descriptions = []
	content = []


	# For-loop to append the article's details for each Article object
	for article in article_list:
	titles.append(article.title)
	authors.append(article.authors)
	urls.append(article.url)
	content.append(article.text)

	# There are three locations for keywords. News Keywords is best source.
	# If statement to hierarchically append keywords based on preference order
	if len(article.meta_data['news_keywords']) > 0:
	keywords.append(article.meta_data['news_keywords'].split(', '))
	elif len(article.meta_keywords[0]) > 0: # index 0 because no metakeywords returns[''] and len(['']) = 1
	keywords.append(article.meta_keywords)
	elif len(article.keywords) > 0:
	keywords.append(article.keywords)
	else:
	keywords.append([])

	# There are three locations for description. Meta_data description is the best source.
	# If statement to hierarchically append descriptions/summaries based on preference order.
	if len(article.meta_description) > 0:
	descriptions.append(article.meta_description)
	elif len(article.meta_data['description']) > 0:
	descriptions.append(article.meta_data['description'])
	elif len(article.summary) > 0:
	descriptions.append(article.summary)
	else:
	descriptions.append('')

	# Create a dictionary with the information collected for each article
	article_dict = {'Title':titles,
	'Description':descriptions,
	'Published': publishedAt,
	'Keywords':keywords,
	'Content':content,
	'Authors':authors,
	'URL':urls}

	# Using the dictionary above, create a dataframe to return
	article_df = pd.DataFrame(article_dict)

	return(article_df)

	# Using the function above, extract our dataframe
	wells_fargo_df = get_details(wells_fargo_articles)
	wells_fargo_df.head()