empireshades/newsphrases.py

## newsphrases.py
from textblob import TextBlob, Word
import sys
import random
from newspaper import Article
from fuzzywuzzy import fuzz
from nltk.stem import WordNetLemmatizer

def nphra(text):
    # Extract Noun Phrases from text input
    blob = TextBlob(text)
    for np in blob.noun_phrases:
        print(np)

def summary(text):
    # Extract lemmatized (root) of every noun in text input
    blob = TextBlob(text)

    nouns = list()
    for word, tag in blob.tags:
        if tag == 'NN':
            nouns.append(word.lemmatize())

    print(nouns)

def cmp_news(url1,url2):
    '''Download, parse, compare two articles (urls).
    Lemmatize parsed keywords,
    and compare using fuzzy logic to return a numerical score'''
    article1 = Article(url1)
    article2 = Article(url2)
    for i in article1, article2:
        i.download()
        i.parse()
        i.nlp()

    wordnet_lemmatizer = WordNetLemmatizer()
    key1 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article1.keywords ])
    key2 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article2.keywords ])

    print(key1,'\n',key2)
    print('fuzz.token_set: {}'.format(fuzz.token_set_ratio(key1, key2)))
	from textblob import TextBlob, Word
	import sys
	import random
	from newspaper import Article
	from fuzzywuzzy import fuzz
	from nltk.stem import WordNetLemmatizer

	def nphra(text):
	# Extract Noun Phrases from text input
	blob = TextBlob(text)
	for np in blob.noun_phrases:
	print(np)

	def summary(text):
	# Extract lemmatized (root) of every noun in text input
	blob = TextBlob(text)

	nouns = list()
	for word, tag in blob.tags:
	if tag == 'NN':
	nouns.append(word.lemmatize())

	print(nouns)

	def cmp_news(url1,url2):
	'''Download, parse, compare two articles (urls).
	Lemmatize parsed keywords,
	and compare using fuzzy logic to return a numerical score'''
	article1 = Article(url1)
	article2 = Article(url2)
	for i in article1, article2:
	i.download()
	i.parse()
	i.nlp()

	wordnet_lemmatizer = WordNetLemmatizer()
	key1 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article1.keywords ])
	key2 = ' '.join([ wordnet_lemmatizer.lemmatize(i) for i in article2.keywords ])

	print(key1,'\n',key2)
	print('fuzz.token_set: {}'.format(fuzz.token_set_ratio(key1, key2)))