Abhayparashar31/gensim_text_summarizer.py

## gensim_text_summarizer.py
import gensim
import re
from gensim.summarization.summarizer import summarize
import requests
from bs4 import BeautifulSoup

url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'

res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')

extracted_rows_content = []
for i in range(len(soup.select('p'))):
    row_text = soup.select('p')[i].getText().strip()
    extracted_rows_content.append(row_text)
raw_data = " ".join(extracted_rows_content)

import re
def clean_data(data):
  text = re.sub(r"\[[0-9]*\]"," ",data)
  text = text.lower()
  text = re.sub(r'\s+'," ",text)
  text = re.sub(r","," ",text)
  return text
cleaned_article_content = clean_data(raw_data)

summary = summarize(cleaned_article_content, ratio = 0.01)
summary = re.sub('\[[^\]]*\]','',summary)
print(summary)
	import gensim
	import re
	from gensim.summarization.summarizer import summarize
	import requests
	from bs4 import BeautifulSoup

	url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'

	res = requests.get(url)
	soup = BeautifulSoup(res.text,'html.parser')

	extracted_rows_content = []
	for i in range(len(soup.select('p'))):
	row_text = soup.select('p')[i].getText().strip()
	extracted_rows_content.append(row_text)
	raw_data = " ".join(extracted_rows_content)

	import re
	def clean_data(data):
	text = re.sub(r"\[[0-9]*\]"," ",data)
	text = text.lower()
	text = re.sub(r'\s+'," ",text)
	text = re.sub(r","," ",text)
	return text
	cleaned_article_content = clean_data(raw_data)

	summary = summarize(cleaned_article_content, ratio = 0.01)
	summary = re.sub('\[[^\]]*\]','',summary)
	print(summary)