proj
├──proj
| ├──__init__.py
| ├──settings.py
| └──celery.py
├──app1
| └──tasks.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
from pyppeteer import launch | |
async def main(): | |
browser = await launch({'headless': False, | |
'args': ['--proxy-server=socks5://127.0.0.1:9050']}) | |
page = await browser.newPage() | |
await page.goto('https://ifconfig.me') | |
input() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
from pyppeteer import launch | |
async def intercept_network_response(response): | |
# In this example, we care only about responses returning JSONs | |
if "application/json" in response.headers.get("content-type", ""): | |
# Print some info about the responses |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DummyPolicy(object): | |
def __init__(self, settings): | |
self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES') | |
self.ignore_http_codes = [ | |
int(x) for x in settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES')] | |
def should_cache_request(self, request): | |
return urlparse_cached(request).scheme not in self.ignore_schemes |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class XlsxPipeline(): | |
def open_spider(self, spider): | |
# create an Excel workbook | |
self._wb = Workbook() | |
self._ws = self._wb.active | |
self._ws.title = 'MyBooksTitle' | |
self._ws.append(['Title', 'img_urls']) | |
row = list(self._ws.rows)[0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pipeline | |
from scrapy.pipelines.images import ImagesPipeline | |
from scrapy.http import Request | |
class CustomImageNamePipeline(ImagesPipeline): | |
# save as title from items |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# first step | |
# scrapy startproject tutorial | |
# cd tutorial | |
# scrapy genspider example example.com | |
---------------------------------------------------------------------------------- | |
###proxy https://github.com/aivarsk/scrapy-proxies |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install | |
pip install keyring keyrings.alt | |
# Edit ~/.config/python_keyring/keyringrc.cfg: | |
[backend] | |
default-keyring=keyrings.alt.file.PlaintextKeyring | |
keyring.set_password(systemname, username, password) | |
passwd = keyring.get_password(systemname, username) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM python:3.6 | |
ENV PYTHONUNBUFFERED 1 | |
COPY ./requirements.txt /code/requirements.txt | |
RUN pip install -r /code/requirements.txt | |
COPY . /code/ | |
WORKDIR /code/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lxml.html | |
html = '''<html> | |
<head> | |
<title>MyTitle</title> | |
</head> | |
<body> | |
<h1>Header</h1> | |
<a href = "ya.ru">1</a> |