Skip to content

Instantly share code, notes, and snippets.

@rokujyouhitoma
Created June 12, 2018 06:39
Show Gist options
  • Save rokujyouhitoma/32e227041cd2e2f565097e864c20d2c9 to your computer and use it in GitHub Desktop.
Save rokujyouhitoma/32e227041cd2e2f565097e864c20d2c9 to your computer and use it in GitHub Desktop.
example apscheduler(TwistedScheduler)
rom datetime import datetime
from apscheduler.schedulers.twisted import TwistedScheduler
from scrapy.crawler import CrawlerProcess
from scrapy.crawler import CrawlerRunner
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from twisted.internet import reactor, defer
spiders = []
def add_spider_jobs(scheduler):
process = CrawlerProcess(get_project_settings())
for spider in spiders:
scheduler.add_job(
process.crawl,
"cron",
args=[spider],
minute=(index % 30) * 2,
id=spider.name)
return scheduler, process
def example_process():
process = CrawlerProcess(get_project_settings())
process.crawl(IeloveSpider)
process.start()
def example_runner():
configure_logging()
runner = CrawlerRunner(get_project_settings())
runner.crawl(FdsnsSpider)
#runner.crawl(IeloveSpider)
#runner.crawl(FudousanSpider)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
reactor.run()
@defer.inlineCallbacks
def crawl():
runner = CrawlerRunner(get_project_settings())
yield runner.crawl(FdsnsSpider)
def main():
configure_logging()
scheduler = TwistedScheduler()
scheduler.add_job(crawl, 'interval', seconds=10)
scheduler.start()
reactor.run()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment