Instantly share code, notes, and snippets.

Embed
What would you like to do?
Use Text Summarization Algorithms to Help Aid the Writing of Meta Descriptions
import csv
import os
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Lsa
from sumy.summarizers.luhn import LuhnSummarizer as Luhn
from sumy.summarizers.text_rank import TextRankSummarizer as TxtRank
from sumy.summarizers.lex_rank import LexRankSummarizer as LexRank
from sumy.summarizers.sum_basic import SumBasicSummarizer as SumBasic
from sumy.summarizers.kl import KLSummarizer as KL
from sumy.summarizers.edmundson import EdmundsonSummarizer as Edmundson
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
LANGUAGE = "english"
SENTENCES_COUNT = 1
urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: '))
urls = open(urlinput, "r")
outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv')
f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8"))
f.writerow(["URL", "Copy", "Summarization Algorithm"])
for line in iter(urls):
stemmer = Stemmer(LANGUAGE)
lsaSummarizer = Lsa(stemmer)
lsaSummarizer.stop_words = get_stop_words(LANGUAGE)
luhnSummarizer = Luhn(stemmer)
luhnSummarizer.stop_words = get_stop_words(LANGUAGE)
# edmundsonSummarizer.bonus_words = get_bonus_words
lexrankSummarizer = LexRank(stemmer)
lexrankSummarizer.stop_words = get_stop_words(LANGUAGE)
textrankSummarizer = TxtRank(stemmer)
textrankSummarizer.stop_words = get_stop_words(LANGUAGE)
sumbasicSummarizer = SumBasic(stemmer)
sumbasicSummarizer.stop_words = get_stop_words(LANGUAGE)
klSummarizer = KL(stemmer)
klSummarizer.stop_words = get_stop_words(LANGUAGE)
parser = HtmlParser.from_url(line, Tokenizer(LANGUAGE))
for sentence in lsaSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"LSA"])
print("Summarizing URL via LSA: " + line)
for sentence in luhnSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"Luhn"])
print("Summarizing URL via Luhn: " + line)
for sentence in lexrankSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"LexRank"])
print("Summarizing URL via LexRank: " + line)
for sentence in textrankSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"TextRank"])
print("Summarizing URL via TextRank: " + line)
for sentence in sumbasicSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"SumBasic"])
print("Summarizing URL via SumBasic: " + line)
for sentence in klSummarizer(parser.document, SENTENCES_COUNT):
print(sentence)
f.writerow([line,sentence,"KL-Sum"])
print("Summarizing URL via KL-Sum: " + line)
urls.close()
print ("Writing to " + outputcsv + " complete.")
@cyberandy

This comment has been minimized.

cyberandy commented Jun 28, 2018

@pshapiro I get the following error after entering the input text file (Sumy is working fine).

File "metadesc.py", line 19, in <module> urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: ')) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/posixpath.py", line 68, in join if b.startswith('/'): AttributeError: 'builtin_function_or_method' object has no attribute 'startswith'

I tried with and without the extension - for testing I have a csv with two URLs. Many thanks in advance!

@cyberandy

This comment has been minimized.

cyberandy commented Jun 29, 2018

All good - it was a missing module 👍

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment