git
discard all local changes/commits and pull from upstream
git reset --hard origin/master
git pull origin master
import datetime | |
# datetime objest to string | |
datetime.datetime.today().strftime("%m/%d/%Y %H:%M") | |
# from string to datetime object | |
datetime.datetime.strptime('Mar 22, 2016 00:00', "%Y%m%d %H:%M") | |
# плюс день | |
datetime.datetime.today() + datetime.timedelta(days=1) |
# -x, --proxy <[protocol://][user:password@]proxyhost[:port]> | |
# | |
# Use the specified HTTP proxy. | |
# If the port number is not specified, it is assumed at port 1080. | |
curl -x http://proxy_server:proxy_port --proxy-user username:password -L http://url |
sudo apt-get update | |
sudo apt-get install -y python-dev python-pip libxml2-dev libxslt1-dev zlib1g-dev libffi-dev libssl-dev | |
pip install Scrapy | |
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv EA312927 | |
echo "deb http://repo.mongodb.org/apt/ubuntu trusty/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list | |
sudo apt-get update | |
sudo apt-get install -y mongodb-org | |
sudo service mongod start | |
pip install pymongo |
from scrapy.xlib.pydispatch import dispatcher | |
from scrapy import signals | |
from scrapy.exceptions import DropItem | |
from scrapy.utils.serialize import ScrapyJSONEncoder | |
from carrot.connection import BrokerConnection | |
from carrot.messaging import Publisher | |
from twisted.internet.threads import deferToThread |
git
discard all local changes/commits and pull from upstream
git reset --hard origin/master
git pull origin master
sudo apt-get install -y python-dev python-pip libxml2-dev libxslt1-dev zlib1g-dev libffi-dev libssl-dev | |
pip install virtualenv |
$ scrapy runspider txspider.py | |
2016-07-05 23:11:39 [scrapy] INFO: Scrapy 1.1.0 started (bot: scrapybot) | |
2016-07-05 23:11:39 [scrapy] INFO: Overridden settings: {} | |
2016-07-05 23:11:40 [scrapy] INFO: Enabled extensions: | |
['scrapy.extensions.corestats.CoreStats', 'scrapy.extensions.logstats.LogStats'] | |
2016-07-05 23:11:40 [scrapy] INFO: Enabled downloader middlewares: | |
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware', | |
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware', | |
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware', | |
'scrapy.downloadermiddlewares.retry.RetryMiddleware', |
from scrapy import Spider, Item, Field | |
from twisted.internet import defer, reactor | |
class MyItem(Item): | |
url = Field() | |
class MySpider(Spider): |
from celery.schedules import crontab | |
from flask.ext.celery import Celery | |
CELERYBEAT_SCHEDULE = { | |
# executes every night at 4:15 | |
'every-night': { | |
'task': 'user.checkaccounts', | |
'schedule': crontab(hour=4, minute=20) | |
} | |
} |
# If you have faced the error on MacOS X, here's the quick fix - add these lines to your ~/.bash_profile: | |
export LC_ALL=en_US.UTF-8 | |
export LANG=en_US.UTF-8 |