Created
May 8, 2020 16:12
-
-
Save isaqueprofeta/7de4478309817d917dff0eb9b7aeaf82 to your computer and use it in GitHub Desktop.
My first test with scrappy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
from scrapy.http import Request | |
class SheetDownloader(scrapy.Spider): | |
name = 'my_downloader' | |
allowed_domains = ['mydomain.com'] | |
start_urls = [f'https://mydomain.com/path/list/{page}' for page in range(1, 100)] | |
def parse(self, response): | |
for href in response.css('tag.class'): | |
yield Request( | |
url='https://mydomain.com/' + href.attrib['href'], | |
callback=self.save_pdf | |
) | |
def save_pdf(self, response): | |
""" Save pdf files """ | |
path = response.url.split('/')[-1] | |
self.logger.info('Saving PDF %s', path) | |
with open(path, 'wb') as file: | |
file.write(response.body) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment