Skip to content

Instantly share code, notes, and snippets.

@chulanovskyi
Created December 11, 2016 00:14
Show Gist options
  • Save chulanovskyi/fbf6b4510e22bf5486de12f9fb08ba1e to your computer and use it in GitHub Desktop.
Save chulanovskyi/fbf6b4510e22bf5486de12f9fb08ba1e to your computer and use it in GitHub Desktop.
Разработать парсер
import requests
from bs4 import BeautifulSoup as BS
PY_PAGE = 'https://www.python.org'
def get_page(url):
page_html = requests.get(url).text
return page_html
def parse_upcoming_events(page_html):
bs = BS(page_html, 'html.parser')
event_block = bs.find(text='Upcoming Events')
event_list = event_block.parent.parent.find(class_='menu').find_all('li')
event_links = [event.a['href'] for event in event_list]
events_info = get_events_info(event_links)
return events_info
def get_events_info(links):
events_info = []
for link in links:
event_info = {}
event_page = requests.get('{main_url}{event_link}'.format(
main_url=PY_PAGE,
event_link=link,
)).text
bs = BS(event_page, 'html.parser')
title = bs.find(class_='single-event-title')
event_info['title'] = title.string.strip()
location = bs.find(class_='single-event-location')
event_info['location'] = location.string.strip()
date = bs.find(class_='single-event-date')
event_info['date'] = ' '.join(date.get_text().split())
events_info.append(event_info)
return events_info
def pprint_events(events):
for event in events:
print('-'*10)
print('Title: {title}\nLocation: {loc}\nDate: {date}'.format(
title=event['title'],
loc=event['location'],
date=event['date'],
))
if __name__ == '__main__':
page = get_page(PY_PAGE)
upcoming_events = parse_upcoming_events(page)
pprint_events(upcoming_events)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment