B-R-P/summarizer.py

## summarizer.py
import nltk
import numpy as np
from nltk.stem import PorterStemmer
from nltk.text import TextCollection
from nltk.corpus import stopwords as sw
from nltk.tokenize import word_tokenize, sent_tokenize
stem = PorterStemmer().stem
nltk.download('stopwords')
stopWords = sw.words('english')
normalize = lambda array: np.divide(array, np.max(array))
def RankText(text):
	"Function to rank the sentences in the text based on their importance"
	sentenceCollection = TextCollection(tuple(map(# Create a TextCollection object to work with sentences
		lambda sentence: [
			stem(word)
			for word in word_tokenize(sentence.lower())
			if (word.isalpha() and word not in stopWords)
		],
		sentences := sent_tokenize(text)
	)))
	RankedWords = tuple(zip(# Rank words based on their importance using TF-IDF
			uniqueWords := set(sentenceCollection.tokens),
			normalize(tuple(map(sentenceCollection.idf, uniqueWords)))
	))
	return (
		tuple(zip(# Rank sentences based on the importance of words they contain
			sentences,  # Current sentence
			normalize(np.sum([
				np.divide([
						rank
						if word in sentence
						else 0
						for word, rank in RankedWords
					],
					float(len(sentence)) # Avoid long sentence bias
				)
				if len(sentence) != 0  # Avoid division by zero
				else np.zeros(len(RankedWords))  # Create a zero vector for empty sentences
				for sentence in sentenceCollection._texts
			],
				axis=1 # Sum of each row
			))
		)),
		dict(RankedWords)
	)
def getAboveAvg(Sranks):
	"Function to get sentences above the average importance"
	avg = sum(map(lambda i: i[1], Sranks)) / len(Sranks)
	return tuple(filter(lambda i: i[1] > avg, Sranks))
Sranks, Wranks = RankText(open('sample.txt').read())
print("".join(map(lambda i:i[0],getAboveAvg(getAboveAvg(Sranks)))))
print(Wranks)
	import nltk
	import numpy as np
	from nltk.stem import PorterStemmer
	from nltk.text import TextCollection
	from nltk.corpus import stopwords as sw
	from nltk.tokenize import word_tokenize, sent_tokenize
	stem = PorterStemmer().stem
	nltk.download('stopwords')
	stopWords = sw.words('english')
	normalize = lambda array: np.divide(array, np.max(array))
	def RankText(text):
	"Function to rank the sentences in the text based on their importance"
	sentenceCollection = TextCollection(tuple(map(# Create a TextCollection object to work with sentences
	lambda sentence: [
	stem(word)
	for word in word_tokenize(sentence.lower())
	if (word.isalpha() and word not in stopWords)
	],
	sentences := sent_tokenize(text)
	)))
	RankedWords = tuple(zip(# Rank words based on their importance using TF-IDF
	uniqueWords := set(sentenceCollection.tokens),
	normalize(tuple(map(sentenceCollection.idf, uniqueWords)))
	))
	return (
	tuple(zip(# Rank sentences based on the importance of words they contain
	sentences, # Current sentence
	normalize(np.sum([
	np.divide([
	rank
	if word in sentence
	else 0
	for word, rank in RankedWords
	],
	float(len(sentence)) # Avoid long sentence bias
	)
	if len(sentence) != 0 # Avoid division by zero
	else np.zeros(len(RankedWords)) # Create a zero vector for empty sentences
	for sentence in sentenceCollection._texts
	],
	axis=1 # Sum of each row
	))
	)),
	dict(RankedWords)
	)
	def getAboveAvg(Sranks):
	"Function to get sentences above the average importance"
	avg = sum(map(lambda i: i[1], Sranks)) / len(Sranks)
	return tuple(filter(lambda i: i[1] > avg, Sranks))
	Sranks, Wranks = RankText(open('sample.txt').read())
	print("".join(map(lambda i:i[0],getAboveAvg(getAboveAvg(Sranks)))))
	print(Wranks)