Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
def get_info(my_link):
my_page = requests.get(my_link)
my_soup = BeautifulSoup(my_page.text)
author = my_soup.find("meta",
{"name" : "mediator_author"})["content"]
date = my_soup.find("meta",
{"itemprop" : "datePublished"})["content"]
title = my_soup.find("title").text
desc = my_soup.find("meta",
{"name" : "description"})["content"]
div = my_soup.find("div", {"class" : "tables"})
tabs = div.find_all("p")
rubs_raw = tabs[0].find_all("a")
rubs = [r.text for r in rubs_raw]
rubs_str = ", ".join(rubs)
time = tabs[1].find("span").text
diffc = tabs[2].find("span", {"class" : "difficult-value"}).text
pars_raw = my_soup.find_all("p", {"class" : None})
pars_text = [p.text for p in pars_raw]
text = " ".join(pars_text)
text = text.replace("\xa0", " ")
text = text.split("Нашли опечатку?")[0]
result = [title, desc, author, date, time, diffc, rubs_str, text]
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment