Skip to content

Instantly share code, notes, and snippets.

@dchaplinsky
Created December 20, 2012 20:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dchaplinsky/4348138 to your computer and use it in GitHub Desktop.
Save dchaplinsky/4348138 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import os.path
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(PROJECT_ROOT, "../"))
from twisted.internet import reactor, task
from scrapy.crawler import Crawler
from scrapy.http import Request
from scrapy.item import BaseItem
from scrapy.utils.spider import iterate_spider_output
from scrapy.settings import Settings
from scrapy import log
from hotqueue import HotQueue
SETTINGS = Settings({
'SPIDER_MODULES': ['products.spiders'],
})
def run_callback(response, cb):
items = []
for x in iterate_spider_output(cb(response)):
if isinstance(x, BaseItem):
items.append(x)
log.msg(str(items))
return items
def prepare_request(request, opts=[]):
def callback(response):
cb = response.meta['_callback']
items = run_callback(response, cb)
log.msg(items)
return []
request.meta['_callback'] = request.callback
request.callback = callback
return request
def setup_crawler(spider_name, url):
crawler = Crawler(SETTINGS)
crawler.configure()
spider = crawler.spiders.create(spider_name)
r = prepare_request(Request(url, spider.parse_item))
crawler.crawl(spider, [r])
crawler.start()
def main():
input_queue = HotQueue("stock_check")
def scrape():
task = input_queue.get()
log.msg("Got %s" % task)
if task:
setup_crawler(task["spider"], task["url"])
l = task.LoopingCall(scrape)
l.start(1.0)
log.start()
reactor.run()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment