Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Use Text Summarization Algorithms to Help Aid the Writing of Meta Descriptions
import csv
import os
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Lsa
from sumy.summarizers.luhn import LuhnSummarizer as Luhn
from sumy.summarizers.text_rank import TextRankSummarizer as TxtRank
from sumy.summarizers.lex_rank import LexRankSummarizer as LexRank
from sumy.summarizers.sum_basic import SumBasicSummarizer as SumBasic
from sumy.summarizers.kl import KLSummarizer as KL
from sumy.summarizers.edmundson import EdmundsonSummarizer as Edmundson
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
LANGUAGE = "english"
urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: '))
urls = open(urlinput, "r")
outputcsv = os.path.join(os.path.dirname(__file__), input('Enter a filename (minus file extension): ')+'.csv')
f = csv.writer(open(outputcsv, "w+", newline="\n", encoding="utf-8"))
f.writerow(["URL", "Copy", "Summarization Algorithm"])
for line in iter(urls):
stemmer = Stemmer(LANGUAGE)
lsaSummarizer = Lsa(stemmer)
lsaSummarizer.stop_words = get_stop_words(LANGUAGE)
luhnSummarizer = Luhn(stemmer)
luhnSummarizer.stop_words = get_stop_words(LANGUAGE)
# edmundsonSummarizer.bonus_words = get_bonus_words
lexrankSummarizer = LexRank(stemmer)
lexrankSummarizer.stop_words = get_stop_words(LANGUAGE)
textrankSummarizer = TxtRank(stemmer)
textrankSummarizer.stop_words = get_stop_words(LANGUAGE)
sumbasicSummarizer = SumBasic(stemmer)
sumbasicSummarizer.stop_words = get_stop_words(LANGUAGE)
klSummarizer = KL(stemmer)
klSummarizer.stop_words = get_stop_words(LANGUAGE)
parser = HtmlParser.from_url(line, Tokenizer(LANGUAGE))
for sentence in lsaSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via LSA: " + line)
for sentence in luhnSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via Luhn: " + line)
for sentence in lexrankSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via LexRank: " + line)
for sentence in textrankSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via TextRank: " + line)
for sentence in sumbasicSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via SumBasic: " + line)
for sentence in klSummarizer(parser.document, SENTENCES_COUNT):
print("Summarizing URL via KL-Sum: " + line)
print ("Writing to " + outputcsv + " complete.")

This comment has been minimized.

Copy link

@cyberandy cyberandy commented Jun 28, 2018

@pshapiro I get the following error after entering the input text file (Sumy is working fine).

File "", line 19, in <module> urlinput = os.path.join(os.path.dirname(__file__), input('Enter input text file: ')) File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 68, in join if b.startswith('/'): AttributeError: 'builtin_function_or_method' object has no attribute 'startswith'

I tried with and without the extension - for testing I have a csv with two URLs. Many thanks in advance!


This comment has been minimized.

Copy link

@cyberandy cyberandy commented Jun 29, 2018

All good - it was a missing module 👍


This comment has been minimized.

Copy link

@oeonurer oeonurer commented Dec 23, 2018

All good - it was a missing module


Me to get the following error after entering the input text file (Sumy is working fine).

Help me?


This comment has been minimized.

Copy link

@venrine venrine commented Jan 11, 2019

is there anything i am missing. i get this error. thanks in adavance
Traceback (most recent call last):
File "", line 49, in
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/sumy/parsers/", line 34, in from_url
data = fetch_url(url)
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/sumy/", line 23, in fetch_url
with closing(requests.get(url, headers=_HTTP_HEADERS)) as response:
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/requests/", line 75, in get
return request('get', url, params=params, **kwargs)
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/requests/", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/requests/", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/requests/", line 640, in send
adapter = self.get_adapter(url=request.url)
File "/Users/venrine/Documents/htmls/env/lib/python3.5/site-packages/requests/", line 731, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment