git
discard all local changes/commits and pull from upstream
git reset --hard origin/master
git pull origin master
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# A spider example on using reactor.callLater() | |
# for delays and repetition. | |
# scrapy 0.24 | |
import scrapy | |
from twisted.internet import reactor, defer |
from celery.schedules import crontab | |
from flask.ext.celery import Celery | |
CELERYBEAT_SCHEDULE = { | |
# executes every night at 4:15 | |
'every-night': { | |
'task': 'user.checkaccounts', | |
'schedule': crontab(hour=4, minute=20) | |
} | |
} |
from scrapy import Spider, Item, Field | |
from twisted.internet import defer, reactor | |
class MyItem(Item): | |
url = Field() | |
class MySpider(Spider): |
$ scrapy runspider txspider.py | |
2016-07-05 23:11:39 [scrapy] INFO: Scrapy 1.1.0 started (bot: scrapybot) | |
2016-07-05 23:11:39 [scrapy] INFO: Overridden settings: {} | |
2016-07-05 23:11:40 [scrapy] INFO: Enabled extensions: | |
['scrapy.extensions.corestats.CoreStats', 'scrapy.extensions.logstats.LogStats'] | |
2016-07-05 23:11:40 [scrapy] INFO: Enabled downloader middlewares: | |
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', | |
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', | |
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', | |
'scrapy.downloadermiddlewares.retry.RetryMiddleware', |
git
discard all local changes/commits and pull from upstream
git reset --hard origin/master
git pull origin master
from scrapy.xlib.pydispatch import dispatcher | |
from scrapy import signals | |
from scrapy.exceptions import DropItem | |
from scrapy.utils.serialize import ScrapyJSONEncoder | |
from carrot.connection import BrokerConnection | |
from carrot.messaging import Publisher | |
from twisted.internet.threads import deferToThread |
from multiprocessing import Pool | |
from functools import partial | |
def parallel_function(f): | |
def parallize(f, seq): | |
pool = Pool() | |
pool.map(f, seq) | |
pool.close() | |
pool.join() |