lmyyao/celery_scrapy_periodic.py

## celery_scrapy_periodic.py
from celery.task import PeriodicTask

from datetime import timedelta

class Lmy(PeriodicTask):
    run_every = timedelta(seconds=60)
    #celery queue router
    options = {"exchange": "default", "routing_key": "default"}
    name = "xxxxx"

    def run(self):
        import scrapy
        from scrapy.crawler import CrawlerProcess

        URL = 'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1' \
              '&st=-1&fr=&sf=1&fmq=1468240179138_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=' \
              '&height=&face=0&istype=2&itg=0&uptype=urlsearch&ie=utf-8&word=%E6%9F%B3%E5%B2%A9'

        class LiuYanSpider(scrapy.Spider):
            name = "xxxxx"
            start_urls = [URL]

            def parse(self, response):
                urls = response.selector.re("http://g.hiphotos.baidu.com(.*?)\.jpg")
                for i in urls:
                    print("http://g.hiphotos.baidu.com{}.jpg".format(i))

        process = CrawlerProcess({
            'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
            "LOG_ENABLED": False
        })

        process.crawl(LiuYanSpider)
        process.start()
	from celery.task import PeriodicTask

	from datetime import timedelta

	class Lmy(PeriodicTask):
	run_every = timedelta(seconds=60)
	#celery queue router
	options = {"exchange": "default", "routing_key": "default"}
	name = "xxxxx"

	def run(self):
	import scrapy
	from scrapy.crawler import CrawlerProcess

	URL = 'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1' \
	'&st=-1&fr=&sf=1&fmq=1468240179138_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=' \
	'&height=&face=0&istype=2&itg=0&uptype=urlsearch&ie=utf-8&word=%E6%9F%B3%E5%B2%A9'

	class LiuYanSpider(scrapy.Spider):
	name = "xxxxx"
	start_urls = [URL]

	def parse(self, response):
	urls = response.selector.re("http://g.hiphotos.baidu.com(.*?)\.jpg")
	for i in urls:
	print("http://g.hiphotos.baidu.com{}.jpg".format(i))

	process = CrawlerProcess({
	'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
	"LOG_ENABLED": False
	})

	process.crawl(LiuYanSpider)
	process.start()