allatambov/soup

## soup
import requests
from bs4 import BeautifulSoup

page = requests.get("http://nplus1.ru/")
soup = BeautifulSoup(page.text)

links_raw = soup.find_all("a")
links_all = []
for link in links_raw:
    links_all.append(link["href"])

news = []
for link in links_all:
    if "/news/" in link:
        news.append(link)

news_full = []
for n in news:
    news_full.append("https://nplus1.ru" + n)

link0 = news_full[0]
page0 = requests.get(link0)
soup0 = BeautifulSoup(page0.text)

title = soup0.find_all("title")[0].text
date = soup0.find("meta", {"itemprop" : "datePublished"})["content"]
author = soup0.find("meta", {"name" : "mediator_author"})["content"]
desc = soup0.find("meta", {"name" : "description"})["content"]

tabs = soup0.find_all("p", {"class" : "table"})
rubs_raw = tabs[0].find_all("a")
rubs_str = [r.text for r in rubs_raw]
rubrics = " ".join(rubs_str)

ntime = tabs[1].find("span").text
diffc = tabs[2].find("span", {"class" : "difficult-value"}).text

pars_raw = soup0.find_all("p", {"class" : None})
pars_str = [p.text for p in pars_raw]
text = " ".join(pars_str)
text = text.replace("\xa0", " ")
text_final = text.split("Нашли опечатку?")[0]


def get_info(link0):
    page0 = requests.get(link0)
    soup0 = BeautifulSoup(page0.text)

    title = soup0.find_all("title")[0].text
    date = soup0.find("meta", {"itemprop" : "datePublished"})["content"]
    author = soup0.find("meta", {"name" : "mediator_author"})["content"]
    desc = soup0.find("meta", {"name" : "description"})["content"]

    tabs = soup0.find_all("p", {"class" : "table"})
    rubs_raw = tabs[0].find_all("a")
    rubs_str = [r.text for r in rubs_raw]
    rubrics = " ".join(rubs_str)

    ntime = tabs[1].find("span").text
    diffc = tabs[2].find("span", {"class" : "difficult-value"}).text

    pars_raw = soup0.find_all("p", {"class" : None})
    pars_str = [p.text for p in pars_raw]
    text = " ".join(pars_str)
    text = text.replace("\xa0", " ")
    text_final = text.split("Нашли опечатку?")[0]
    return title, author, desc, date, ntime, rubrics, diffc, text_final
	import requests
	from bs4 import BeautifulSoup

	page = requests.get("http://nplus1.ru/")
	soup = BeautifulSoup(page.text)

	links_raw = soup.find_all("a")
	links_all = []
	for link in links_raw:
	links_all.append(link["href"])

	news = []
	for link in links_all:
	if "/news/" in link:
	news.append(link)

	news_full = []
	for n in news:
	news_full.append("https://nplus1.ru" + n)

	link0 = news_full[0]
	page0 = requests.get(link0)
	soup0 = BeautifulSoup(page0.text)

	title = soup0.find_all("title")[0].text
	date = soup0.find("meta", {"itemprop" : "datePublished"})["content"]
	author = soup0.find("meta", {"name" : "mediator_author"})["content"]
	desc = soup0.find("meta", {"name" : "description"})["content"]

	tabs = soup0.find_all("p", {"class" : "table"})
	rubs_raw = tabs[0].find_all("a")
	rubs_str = [r.text for r in rubs_raw]
	rubrics = " ".join(rubs_str)

	ntime = tabs[1].find("span").text
	diffc = tabs[2].find("span", {"class" : "difficult-value"}).text

	pars_raw = soup0.find_all("p", {"class" : None})
	pars_str = [p.text for p in pars_raw]
	text = " ".join(pars_str)
	text = text.replace("\xa0", " ")
	text_final = text.split("Нашли опечатку?")[0]


	def get_info(link0):
	page0 = requests.get(link0)
	soup0 = BeautifulSoup(page0.text)

	title = soup0.find_all("title")[0].text
	date = soup0.find("meta", {"itemprop" : "datePublished"})["content"]
	author = soup0.find("meta", {"name" : "mediator_author"})["content"]
	desc = soup0.find("meta", {"name" : "description"})["content"]

	tabs = soup0.find_all("p", {"class" : "table"})
	rubs_raw = tabs[0].find_all("a")
	rubs_str = [r.text for r in rubs_raw]
	rubrics = " ".join(rubs_str)

	ntime = tabs[1].find("span").text
	diffc = tabs[2].find("span", {"class" : "difficult-value"}).text

	pars_raw = soup0.find_all("p", {"class" : None})
	pars_str = [p.text for p in pars_raw]
	text = " ".join(pars_str)
	text = text.replace("\xa0", " ")
	text_final = text.split("Нашли опечатку?")[0]
	return title, author, desc, date, ntime, rubrics, diffc, text_final