Skip to content

Instantly share code, notes, and snippets.

@conormm
Last active April 23, 2017 18:57
Show Gist options
  • Save conormm/2ba81bc928760c2dda002fe7a35d5a9b to your computer and use it in GitHub Desktop.
Save conormm/2ba81bc928760c2dda002fe7a35d5a9b to your computer and use it in GitHub Desktop.
articles = get_articles(the_guardian, breitbart, title_topic="Trump")
articles["text"] = articles.text.map(clean_text)
articles["text"] = preprocess_articles(articles.text)
def get_readability_stats(parsed_articles):
stats_list = []
for ix, article in enumerate(parsed_articles):
doc = tcy.Doc(article)
readability_stats = tcy.text_stats.readability_stats(doc)
read_stats_df = pd.DataFrame(readability_stats, index=[ix])
stats_list.append(read_stats_df)
return pd.concat(stats_list)
readability_stats = get_readability_stats(articles.text)
articles = pd.concat([articles.reset_index(drop=True), readability_stats], axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment