jluczak/spider_human.py

## spider_human.py
import scrapy
from human.items import HumanItem

class HumanSpider(scrapy.Spider):
  start_urls = [
    'http://humantalks.com/talks/'
  ]
  name = 'human'


  def parse(self, response):
    for talk in response.css('.card-deck a'):
      href = talk.css('a::attr(href)').extract_first()
      full_url = response.urljoin(href)

      yield scrapy.Request(full_url, callback=self.parse_book)

    for href in response.css('a.page-link'):
      yield response.follow(href, self.parse)


  def parse_book(self, response):
    title = response.css('h1::text').extract_first()
    name = response.css('.speaker_name a::text').extract_first()
    description = response.css('.offset-md-2 p::text').extract()
    slide = response.css('.btn-primary::attr(href)').extract_first()
    slide2 = response.css('.player iframe::attr(src)').extract_first()
    video = response.css('.yt-uix-sessionlink a::attr(href)').extract_first()

    yield HumanItem(title=title, description=description, name=name, slide2=slide2,slide=slide,video=video)
	import scrapy
	from human.items import HumanItem

	class HumanSpider(scrapy.Spider):
	start_urls = [
	'http://humantalks.com/talks/'
	]
	name = 'human'



	def parse(self, response):
	for talk in response.css('.card-deck a'):
	href = talk.css('a::attr(href)').extract_first()
	full_url = response.urljoin(href)

	yield scrapy.Request(full_url, callback=self.parse_book)

	for href in response.css('a.page-link'):
	yield response.follow(href, self.parse)


	def parse_book(self, response):
	title = response.css('h1::text').extract_first()
	name = response.css('.speaker_name a::text').extract_first()
	description = response.css('.offset-md-2 p::text').extract()
	slide = response.css('.btn-primary::attr(href)').extract_first()
	slide2 = response.css('.player iframe::attr(src)').extract_first()
	video = response.css('.yt-uix-sessionlink a::attr(href)').extract_first()

	yield HumanItem(title=title, description=description, name=name, slide2=slide2,slide=slide,video=video)