jluczak/spider_mob.py

## spider_mob.py
from scrapy.spiders import CrawlSpider, Rule
from mobile.items import MobileItem
from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
from scrapy.selector import Selector

class MySpider(CrawlSpider):
    name = "mobile"
    allowed_domains = ["mobiletechcon.de"]
    start_urls = ["https://mobiletechcon.de/speakers-en/"]

    rules = (

        Rule(LxmlLinkExtractor(

            restrict_xpaths=(".//*[@id='content-section-1']//a")),
            follow=False,
            callback='parse_item'
         ),

        )

    def parse_item(self, response):

        sel = Selector(response)

        item = MobileItem()

        item['name'] = sel.xpath('//*[@class="gdlr-speaker-content-wrapper"]//h1/text()').extract_first()
        item['bio'] = sel.xpath('//*[@class="gdlr-speaker-content"]//p').extract_first()
        item['link'] = response.url

        yield item

## spider_vmss.py
import scrapy


class QuotesSpider(scrapy.Spider):
    name = "vmss"
    start_urls = [
        'http://soft-dev.org/events/vmss16/'
    ]

    def parse(self, response):
        for quote in response.css('tr.talk'):
            yield {
                'name': quote.css('a::text').extract(),
                'link': quote.css('td a::attr(href)').extract(),
                'title':quote.css('td::text').extract()[1],

            }
	from scrapy.spiders import CrawlSpider, Rule
	from mobile.items import MobileItem
	from scrapy.linkextractors.lxmlhtml import LxmlLinkExtractor
	from scrapy.selector import Selector

	class MySpider(CrawlSpider):
	name = "mobile"
	allowed_domains = ["mobiletechcon.de"]
	start_urls = ["https://mobiletechcon.de/speakers-en/"]

	rules = (

	Rule(LxmlLinkExtractor(

	restrict_xpaths=(".//*[@id='content-section-1']//a")),
	follow=False,
	callback='parse_item'
	),

	)

	def parse_item(self, response):

	sel = Selector(response)

	item = MobileItem()

	item['name'] = sel.xpath('//*[@class="gdlr-speaker-content-wrapper"]//h1/text()').extract_first()
	item['bio'] = sel.xpath('//*[@class="gdlr-speaker-content"]//p').extract_first()
	item['link'] = response.url

	yield item
	import scrapy


	class QuotesSpider(scrapy.Spider):
	name = "vmss"
	start_urls = [
	'http://soft-dev.org/events/vmss16/'
	]

	def parse(self, response):
	for quote in response.css('tr.talk'):
	yield {
	'name': quote.css('a::text').extract(),
	'link': quote.css('td a::attr(href)').extract(),
	'title':quote.css('td::text').extract()[1],

	}