A example of scrapper using asyncio and aiohttp
import asyncio | |
import aiohttp | |
import bs4 | |
import tqdm | |
@asyncio.coroutine | |
def get(*args, **kwargs): | |
response = yield from aiohttp.request('GET', *args, **kwargs) | |
return (yield from response.read_and_close(decode=True)) | |
@asyncio.coroutine | |
def wait_with_progress(coros): | |
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)): | |
yield from f | |
def first_magnet(page): | |
soup = bs4.BeautifulSoup(page) | |
a = soup.find('a', title='Download this torrent using magnet') | |
return a['href'] | |
@asyncio.coroutine | |
def print_magnet(query): | |
url = 'http://thepiratebay.se/search/{}/0/7/0'.format(query) | |
with (yield from sem): | |
page = yield from get(url, compress=True) | |
magnet = first_magnet(page) | |
print('{}: {}'.format(query, magnet)) | |
distros = ['archlinux', 'ubuntu', 'debian'] | |
sem = asyncio.Semaphore(5) | |
loop = asyncio.get_event_loop() | |
f = asyncio.wait([print_magnet(d) for d in distros]) | |
loop.run_until_complete(f) |
This comment has been minimized.
This comment has been minimized.
Perhaps I'm being picky, but you should pass your variable |
This comment has been minimized.
This comment has been minimized.
Hi, I did a "POST" version with connection pooling after I'd read the example. import asyncio
import aiohttp
import ujson
import tqdm
payload = ujson.dumps({'hello': 'world'})
url = "http://127.0.0.1:8000"
sem = asyncio.Semaphore(5)
@asyncio.coroutine
def wait_with_progress(coros):
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)):
yield from f
@asyncio.coroutine
def post():
resp = yield from aiohttp.request('post', url, data=payload, connector=conn)
return (yield from resp.text())
@asyncio.coroutine
def print_status(i):
with (yield from sem):
status = yield from post()
#print('#' + str(i) + ' ' + status)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
conn = aiohttp.connector.TCPConnector()
f = [print_status(i) for i in range(5000)]
loop.run_until_complete(wait_with_progress(f)) And the echo server: from flask import Flask
from flask import request
app = Flask(__name__)
@app.route('/', methods=['POST'])
def echo():
return request.data
if __name__ == '__main__':
app.run(port=8000) |
This comment has been minimized.
This comment has been minimized.
The aiohttp method ClientResponse.read_and_close() is gone as of December 2015 Should change line 10 to: return (yield from response.text()) |
This comment has been minimized.
This comment has been minimized.
Tested with Python 3.8, but rip ThePirateBay. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
FYI it's spelled "scraper" not "scrapper"