Skip to content

Instantly share code, notes, and snippets.

@Sebastian-Nielsen
Created March 16, 2018 19:14
Show Gist options
  • Save Sebastian-Nielsen/3bc45cbba6cb25837f5a6f11dbeeb044 to your computer and use it in GitHub Desktop.
Save Sebastian-Nielsen/3bc45cbba6cb25837f5a6f11dbeeb044 to your computer and use it in GitHub Desktop.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem.snowball import SnowballStemmer
import nltk
# If you get an error uncomment this line and download the necessary libraries
#nltk.download()
text = """ """
stemmer = SnowballStemmer("english")
stopWords = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = dict()
for word in words:
word = word.lower()
if word in stopWords:
continue
word = stemmer.stem(word)
if word in freqTable:
freqTable[word] += 1
else:
freqTable[word] = 1
sentences = sent_tokenize(text)
sentenceValue = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentenceValue:
sentenceValue[sentence] += freq
else:
sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
sumValues += sentenceValue[sentence]
# Average value of a sentence from original text
average = int(sumValues / len(sentenceValue))
summary = ''
for sentence in sentences:
if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
summary += " " + sentence
print(summary)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment