noelleleigh/get_sunbeam.py

## get_sunbeam.py
#!/usr/bin/env python3
'''Dowload the comic *On a Sunbeam*'''
import shutil
from urllib.parse import urljoin, urlparse
from os.path import basename
import requests
from bs4 import BeautifulSoup


def get_chapter_links(url):
    '''Yield the links from the html at `url` that start with "chapter".'''
    homepage_response = requests.get(url)
    soup = BeautifulSoup(homepage_response.text, 'html.parser')
    link_elements = soup.find_all('a')

    for link_element in link_elements:
        if link_element.get('href').startswith('chapter'):
            yield urljoin(url, link_element.get('href'))


def get_chapter_image_links(url, homepage):
    '''
    Yield the urls of images from the html at `url` from the website root
    `homepage`.

    The images are from `<img>` elements with the class of `chapter-page` and
    the urls are in the `data-original` attribute.
    '''
    chapter_response = requests.get(url)
    soup = BeautifulSoup(chapter_response.text, 'html.parser')
    chapter_img_elements = soup.find_all('img', class_='chapter-page')

    for chapter_img_element in chapter_img_elements:
        yield urljoin(homepage, chapter_img_element.get('data-original'))


def download_file(url, destination, log=False):
    '''
    Download the file at `url` to `destination`.

    Source: https://stackoverflow.com/a/13137873
    '''
    response = requests.get(url, stream=True)
    if log:
        print('Downloading {} to {}'.format(url, destination))

    if response.status_code != 200:
        raise Exception(
            'Request to {url} returned status code: {code} {reason}'.format(
                url=response.url,
                code=response.status_code,
                reason=response.reason
            )
        )

    with open(destination, 'wb') as destination_file:
        response.raw.decode_content = True
        shutil.copyfileobj(response.raw, destination_file)


def download_sunbeam(homepage):
    '''
    Download all the pages of *On a Sunbeam* to the current directory.

    `homepage` is the homepage of the comic.
    '''
    for chapter_link in get_chapter_links(homepage):
        for chapter_image_link in get_chapter_image_links(chapter_link, homepage):  # noqa: E501
            filename = basename(urlparse(chapter_image_link).path)
            download_file(chapter_image_link, filename, log=True)


if __name__ == '__main__':
    download_sunbeam('http://www.onasunbeam.com/')
	#!/usr/bin/env python3
	'''Dowload the comic On a Sunbeam'''
	import shutil
	from urllib.parse import urljoin, urlparse
	from os.path import basename
	import requests
	from bs4 import BeautifulSoup


	def get_chapter_links(url):
	'''Yield the links from the html at `url` that start with "chapter".'''
	homepage_response = requests.get(url)
	soup = BeautifulSoup(homepage_response.text, 'html.parser')
	link_elements = soup.find_all('a')

	for link_element in link_elements:
	if link_element.get('href').startswith('chapter'):
	yield urljoin(url, link_element.get('href'))


	def get_chapter_image_links(url, homepage):
	'''
	Yield the urls of images from the html at `url` from the website root
	`homepage`.

	The images are from `<img>` elements with the class of `chapter-page` and
	the urls are in the `data-original` attribute.
	'''
	chapter_response = requests.get(url)
	soup = BeautifulSoup(chapter_response.text, 'html.parser')
	chapter_img_elements = soup.find_all('img', class_='chapter-page')

	for chapter_img_element in chapter_img_elements:
	yield urljoin(homepage, chapter_img_element.get('data-original'))


	def download_file(url, destination, log=False):
	'''
	Download the file at `url` to `destination`.

	Source: https://stackoverflow.com/a/13137873
	'''
	response = requests.get(url, stream=True)
	if log:
	print('Downloading {} to {}'.format(url, destination))

	if response.status_code != 200:
	raise Exception(
	'Request to {url} returned status code: {code} {reason}'.format(
	url=response.url,
	code=response.status_code,
	reason=response.reason
	)
	)

	with open(destination, 'wb') as destination_file:
	response.raw.decode_content = True
	shutil.copyfileobj(response.raw, destination_file)


	def download_sunbeam(homepage):
	'''
	Download all the pages of On a Sunbeam to the current directory.

	`homepage` is the homepage of the comic.
	'''
	for chapter_link in get_chapter_links(homepage):
	for chapter_image_link in get_chapter_image_links(chapter_link, homepage): # noqa: E501
	filename = basename(urlparse(chapter_image_link).path)
	download_file(chapter_image_link, filename, log=True)


	if __name__ == '__main__':
	download_sunbeam('http://www.onasunbeam.com/')