linuskohl/naive-benchmark-on-news-headlines.py

## naive-benchmark-on-news-headlines.py
# pip install newsapi-python
import time
import pandas as pd
import numpy as np
import seaborn as sns
from newsapi import NewsApiClient

newsapi = NewsApiClient(api_key=API_KEY)
# get 100 latest news items
top_news = newsapi.get_top_headlines(language='en', page_size=100)

performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time'])

# iterate over article headlines, log duration, length of headline and number of identified entities
for news in top_headlines["articles"]:
    title = news['title']
    nr_words = len(title.split())
    request_doc = AnalyzeInput(docId="sample_article", language="en")
    request_doc.text = title
    s_time = time.time() # track time
    res = ac.analyze(request_doc)
    nr_matches = len(res.matches)
    e_time = time.time() - s_time
    performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True)
	# pip install newsapi-python
	import time
	import pandas as pd
	import numpy as np
	import seaborn as sns
	from newsapi import NewsApiClient

	newsapi = NewsApiClient(api_key=API_KEY)
	# get 100 latest news items
	top_news = newsapi.get_top_headlines(language='en', page_size=100)

	performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time'])

	# iterate over article headlines, log duration, length of headline and number of identified entities
	for news in top_headlines["articles"]:
	title = news['title']
	nr_words = len(title.split())
	request_doc = AnalyzeInput(docId="sample_article", language="en")
	request_doc.text = title
	s_time = time.time() # track time
	res = ac.analyze(request_doc)
	nr_matches = len(res.matches)
	e_time = time.time() - s_time
	performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True)