Created
November 2, 2019 01:59
Star
You must be signed in to star a gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import trio | |
WORKER_COUNT = 10 | |
async def worker(receive_chan): | |
# Trick: each url gets its own clone of the send channel | |
# After processing a URL, we close its clone | |
# That way, when all the URLs are done, all the send clones will be | |
# closed, so the 'async for ... in receive_chan' will automatically exit. | |
async for send_chan, url in receive_chan: | |
async with send_chan: | |
body = await fetch(url) | |
# ... do whatever else you want to do with the body here ... | |
new_urls = parse_urls(body) | |
for new_url in new_urls: | |
await send_chan.send((send_chan.clone(), new_url)) | |
async def crawl(initial_url): | |
# We use an unbounded channel, since our consumers are also producers, so | |
# any finite buffer could potentially produce a deadlock | |
send_chan, receive_chan = trio.open_memory_channel(float("inf")) | |
# Put in the first url to start us off | |
await send_chan.send((send_chan, url)) | |
async with trio.open_nursery() as nursery: | |
for i in range(WORKER_COUNT): | |
nursery.start_soon(worker, receive_chan) | |
trio.run(crawl, "https://...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment