Skip to content

Instantly share code, notes, and snippets.

@linuskohl
Created June 4, 2019 16:19
Show Gist options
  • Save linuskohl/580a9056481dce98f417d5eeb027ff17 to your computer and use it in GitHub Desktop.
Save linuskohl/580a9056481dce98f417d5eeb027ff17 to your computer and use it in GitHub Desktop.
Naive benchmark of AmbiverseNLU on news headlines
# pip install newsapi-python
import time
import pandas as pd
import numpy as np
import seaborn as sns
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key=API_KEY)
# get 100 latest news items
top_news = newsapi.get_top_headlines(language='en', page_size=100)
performance = pd.DataFrame(columns=['nr_words', 'nr_matches', 'time'])
# iterate over article headlines, log duration, length of headline and number of identified entities
for news in top_headlines["articles"]:
title = news['title']
nr_words = len(title.split())
request_doc = AnalyzeInput(docId="sample_article", language="en")
request_doc.text = title
s_time = time.time() # track time
res = ac.analyze(request_doc)
nr_matches = len(res.matches)
e_time = time.time() - s_time
performance = performance.append({'nr_words': nr_words, 'nr_matches': nr_matches, 'time': e_time}, ignore_index=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment