Skip to content

Instantly share code, notes, and snippets.

View ajib6ept's full-sized avatar
🏠
Working from home

Альберт ajib6ept

🏠
Working from home
  • Russia
  • 05:14 (UTC +03:00)
View GitHub Profile
import asyncio
from pyppeteer import launch
async def main():
browser = await launch({'headless': False,
'args': ['--proxy-server=socks5://127.0.0.1:9050']})
page = await browser.newPage()
await page.goto('https://ifconfig.me')
input()
import asyncio
from pyppeteer import launch
async def intercept_network_response(response):
# In this example, we care only about responses returning JSONs
if "application/json" in response.headers.get("content-type", ""):
# Print some info about the responses
class DummyPolicy(object):
def __init__(self, settings):
self.ignore_schemes = settings.getlist('HTTPCACHE_IGNORE_SCHEMES')
self.ignore_http_codes = [
int(x) for x in settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES')]
def should_cache_request(self, request):
return urlparse_cached(request).scheme not in self.ignore_schemes
class XlsxPipeline():
def open_spider(self, spider):
# create an Excel workbook
self._wb = Workbook()
self._ws = self._wb.active
self._ws.title = 'MyBooksTitle'
self._ws.append(['Title', 'img_urls'])
row = list(self._ws.rows)[0]
# pipeline
from scrapy.pipelines.images import ImagesPipeline
from scrapy.http import Request
class CustomImageNamePipeline(ImagesPipeline):
# save as title from items
# first step
# scrapy startproject tutorial
# cd tutorial
# scrapy genspider example example.com
----------------------------------------------------------------------------------
###proxy https://github.com/aivarsk/scrapy-proxies
# install
pip install keyring keyrings.alt
# Edit ~/.config/python_keyring/keyringrc.cfg:
[backend]
default-keyring=keyrings.alt.file.PlaintextKeyring
keyring.set_password(systemname, username, password)
passwd = keyring.get_password(systemname, username)
FROM python:3.6
ENV PYTHONUNBUFFERED 1
COPY ./requirements.txt /code/requirements.txt
RUN pip install -r /code/requirements.txt
COPY . /code/
WORKDIR /code/
@ajib6ept
ajib6ept / 1.md
Last active February 6, 2019 18:11

Project Structure

proj
├──proj
|  ├──__init__.py
|  ├──settings.py
|  └──celery.py
├──app1
|  └──tasks.py
import lxml.html
html = '''<html>
<head>
<title>MyTitle</title>
</head>
<body>
<h1>Header</h1>
<a href = "ya.ru">1</a>