Skip to content

Instantly share code, notes, and snippets.

@allatambov
Last active March 10, 2020 16:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allatambov/f85f9ba8233b89061bee587c63615027 to your computer and use it in GitHub Desktop.
Save allatambov/f85f9ba8233b89061bee587c63615027 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
url = 'https://nplus1.ru/'
page = requests.get(url)
soup = BeautifulSoup(page.text)
urls = []
for link in soup.find_all('a'):
if '/news' in link.get('href'):
urls.append(link.get('href'))
full_urls = ['https://nplus1.ru' + u for u in urls]
url0 = full_urls[1]
page0 = requests.get(url0)
soup0 = BeautifulSoup(page0.text)
author = soup0.findAll('meta',
{'name' : 'author'})[0]['content']
date = soup0.findAll('meta',
{'itemprop' : 'datePublished'})[0]['content']
title = soup0.findAll('meta',
{'property' : 'og:title'})[0]['content']
description = soup0.findAll('meta',
{'name' : 'description'})[0]['content']
def GetNews(url0):
page0 = requests.get(url0)
soup0 = BeautifulSoup(page0.text)
author = soup0.findAll('meta',
{'name' : 'author'})[0]['content']
date = soup0.findAll('meta',
{'itemprop' : 'datePublished'})[0]['content']
title = soup0.findAll('meta',
{'property' : 'og:title'})[0]['content']
description = soup0.findAll('meta',
{'name' : 'description'})[0]['content']
rubrics = []
for i in soup0.findAll('p', {'class' : 'table'})[0].findAll('a'):
rubrics.append(i.text)
raw = soup0.findAll('p', {'class' : None})
texts = []
for r in raw:
texts.append(r.text)
final = " ".join(texts)
final = final.strip().replace('\xa0', " ")
news = final.split('Нашли опечатку?')[0]
return author, date, title, description, rubrics, news
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment