Skip to content

Instantly share code, notes, and snippets.

@allatambov
Created June 2, 2022 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allatambov/a9e3ee4f52114e097cb5269aec467416 to your computer and use it in GitHub Desktop.
Save allatambov/a9e3ee4f52114e097cb5269aec467416 to your computer and use it in GitHub Desktop.
def get_info(my_link):
my_page = requests.get(my_link)
my_soup = BeautifulSoup(my_page.text)
author = my_soup.find("meta",
{"name" : "mediator_author"})["content"]
date = my_soup.find("meta",
{"itemprop" : "datePublished"})["content"]
title = my_soup.find("title").text
desc = my_soup.find("meta",
{"name" : "description"})["content"]
div = my_soup.find("div", {"class" : "tables"})
tabs = div.find_all("p")
rubs_raw = tabs[0].find_all("a")
rubs = [r.text for r in rubs_raw]
rubs_str = ", ".join(rubs)
time = tabs[1].find("span").text
diffc = tabs[2].find("span", {"class" : "difficult-value"}).text
pars_raw = my_soup.find_all("p", {"class" : None})
pars_str = [p.text for p in pars_raw]
text = " ".join(pars_str)
text = text.split("Нашли опечатку?")[0]
text = text.replace("\xa0", " ")
text = text.replace("\n", " ")
res = [title, desc, author, date, time, rubs_str, diffc, text]
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment