Skip to content

Instantly share code, notes, and snippets.

@thomaswpp
Created January 27, 2018 17:41
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save thomaswpp/a1f95a4d046a144305aaba12ba359087 to your computer and use it in GitHub Desktop.
import scrapy
class Blacktag(scrapy.Spider):
name = 'events'
start_urls = [
'https://blacktag.com.br/'
]
custom_settings = {
'DOWNLOAD_DELAY': 1.5,
'FEED_EXPORT_ENCODING':'utf-8'
}
def parse(self, response):
for event in response.css('.shadow'):
yield {
'link': scrapy.Request(response.urljoin(event.css('a::attr("href")').extract_first()), callback=self.parse_info_event),
'link-img': event.css('.image-event img::attr("src")').extract_first(),
'name': event.css('.info h2::text').extract_first(),
'data': event.css('.info h3::text').extract_first()
}
def parse_info_event(self, response):
name = response.xpath('//div[@class="event-info"]/h1/text()').extract_first()
date = response.xpath('//div[@class="info-event-date icon"]/small/text()').extract_first()
address = response.xpath('//div[@class="info-event-place icon"]/small/text()').extract_first()
info = response.xpath('//section[@class="description"]/small/text()').extract_first()
yield dict(name=name, date=date, address=address, info=info)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment