lucndm/2saoVnSpider.py

## 2saoVnSpider.py
# -*- coding: utf-8 -*-
import scrapy
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.contrib.spiders import CrawlSpider, Rule
from manga_scrapy.items import VideoItem

#2sao.vn
class SaoVnSpider(CrawlSpider):
    name = '2saovn'
    allowed_domains = ['2sao.vn']
    start_urls = [
        'http://2sao.vn/clip/nguoi-dan-xuong-pho-vui-noel-p0c1066n20151224193946087.vnn'
    ]

    rules = [
        Rule(LinkExtractor(allow=r'/clip/[-a-z0-9\._]+'),
             callback='parse_item', follow=True)
    ]

    def parse_item(self, response):
            item = VideoItem()
            item['url'] = response.xpath("//meta[@property='og:url']/@content").extract()[0]
            item['title'] = response.xpath("//meta[@property='og:title']/@content").extract()[0]
            youtube_link = response.xpath("//iframe[contains(@src,'youtube')]/@src").extract()[0]
            if youtube_link is None:
                item['video_link'] = 'Not Found Link !'
            else:
                item['video_link'] = youtube_link.replace('//', '').replace('embed/', '/watch?v=').replace('?rel=0', '')
            return item
	# -- coding: utf-8 --
	import scrapy
	from scrapy.contrib.linkextractors import LinkExtractor
	from scrapy.contrib.spiders import CrawlSpider, Rule
	from manga_scrapy.items import VideoItem

	#2sao.vn
	class SaoVnSpider(CrawlSpider):
	name = '2saovn'
	allowed_domains = ['2sao.vn']
	start_urls = [
	'http://2sao.vn/clip/nguoi-dan-xuong-pho-vui-noel-p0c1066n20151224193946087.vnn'
	]

	rules = [
	Rule(LinkExtractor(allow=r'/clip/[-a-z0-9\._]+'),
	callback='parse_item', follow=True)
	]

	def parse_item(self, response):
	item = VideoItem()
	item['url'] = response.xpath("//meta[@property='og:url']/@content").extract()[0]
	item['title'] = response.xpath("//meta[@property='og:title']/@content").extract()[0]
	youtube_link = response.xpath("//iframe[contains(@src,'youtube')]/@src").extract()[0]
	if youtube_link is None:
	item['video_link'] = 'Not Found Link !'
	else:
	item['video_link'] = youtube_link.replace('//', '').replace('embed/', '/watch?v=').replace('?rel=0', '')
	return item