Skip to content

Instantly share code, notes, and snippets.

@rchacon
Last active August 4, 2016 03:59
Show Gist options
  • Save rchacon/1ab506bd70eee3f76cfd7c5985b3de4b to your computer and use it in GitHub Desktop.
Save rchacon/1ab506bd70eee3f76cfd7c5985b3de4b to your computer and use it in GitHub Desktop.
"""
Scrape shows from http://www.thechapelsf.com/music/
Usage:
$ python chapelsf.py
Sample output:
[ { 'artist': 'Emily King',
'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm',
'sold_out': True,
'ticket_price': '\n$20 adv / $22 door\t ',
'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/'},
{ 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones",
'show_date': 'Thu 8/04 5:00 pm / ',
'sold_out': False,
'ticket_price': None,
'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/'},
...]
"""
import sys
from bs4 import BeautifulSoup
import requests
URL = 'http://www.thechapelsf.com/music/'
def get_markup():
resp = requests.get(URL)
if not resp.ok:
sys.exit('HTTP status received {}'.format(resp.status_code))
return resp.text
def parse_shows(markup):
soup = BeautifulSoup(markup, 'html.parser')
music_listings = soup.find_all(attrs={'class': 'list-view-item'})
shows = []
for music_listing in music_listings:
artist = music_listing.find(attrs={'class': 'headliners'}).find('a').text
show_date = music_listing.find(attrs={'class': 'dates'}).text
door_time = music_listing.find(attrs={'class': 'times'}).find('span').text
show_time = music_listing.find(attrs={'class': 'times'}).find_all('span')[1].text
ticket_price_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'price-range'})
if ticket_price_tag:
ticket_price = ticket_price_tag.text
else:
ticket_price = None
sold_out_tag = music_listing.find(attrs={'class': 'ticket-price'}).find(attrs={'class': 'sold-out'})
if sold_out_tag:
sold_out = True
else:
sold_out = False
ticket_url = music_listing.find(attrs={'class': 'headliners'}).find('a')['href']
shows.append({
'artist': artist,
'show_date': '{} {} / {}'.format(show_date, door_time, show_time),
'ticket_price': ticket_price,
'sold_out': sold_out,
'ticket_url': '{}{}'.format(URL, ticket_url)
})
return shows
def main():
markup = get_markup()
return parse_shows(markup)
if __name__ == '__main__':
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(main())
"""
Copy paste source from http://www.thechapelsf.com/music/ and save to markup.html
Usage:
$ py.test tests.py
"""
import pytest
from chapelsf import parse_shows
@pytest.fixture(scope='module')
def markup():
with open('markup.html') as f:
data = f.read()
return data
def test_parse_shows(markup):
result = parse_shows(markup)
first = {'show_date': 'Wed 8/03 Doors: 8:00 pm / Show: 9:00 pm', 'ticket_url': 'http://www.thechapelsf.com/music//event/1151171-emily-king-san-francisco/', 'artist': 'Emily King', 'sold_out': True, 'ticket_price': '\n$20 adv / $22 door\t '}
assert first == result[0]
second = {'show_date': 'Thu 8/04 5:00 pm / ', 'ticket_url': 'http://www.thechapelsf.com/music//event/1261079-sinners-happy-hour-ted-san-francisco/', 'artist': "Sinner's Happy Hour with Ted Savarese and the TedTones", 'sold_out': False, 'ticket_price': None}
assert second == result[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment