from textblob import TextBlob, Word
import sys
import random
from newspaper import Article
from fuzzywuzzy import fuzz
from nltk.stem import WordNetLemmatizer
def nphra(text):
# Extract Noun Phrases from text input
blob = TextBlob(text)
for np in blob.noun_phrases:
def summary(text):
# Extract lemmatized (root) of every noun in text input
blob = TextBlob(text)
nouns = list()
for word, tag in blob.tags:
if tag == 'NN':
def cmp_news(url1,url2):
'''Download, parse, compare two articles (urls).
Lemmatize parsed keywords,
and compare using fuzzy logic to return a numerical score'''
article1 = Article(url1)
article2 = Article(url2)
for i in article1, article2:
wordnet_lemmatizer = WordNetLemmatizer()
key1 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article1.keywords ])
key2 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article2.keywords ])
print('fuzz.token_set: {}'.format(fuzz.token_set_ratio(key1, key2)))
