Skip to content

Instantly share code, notes, and snippets.

@njsmith
Created November 2, 2019 01:59
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save njsmith/432663a79266ece1ec9461df0062098d to your computer and use it in GitHub Desktop.
import trio
WORKER_COUNT = 10
async def worker(receive_chan):
# Trick: each url gets its own clone of the send channel
# After processing a URL, we close its clone
# That way, when all the URLs are done, all the send clones will be
# closed, so the 'async for ... in receive_chan' will automatically exit.
async for send_chan, url in receive_chan:
async with send_chan:
body = await fetch(url)
# ... do whatever else you want to do with the body here ...
new_urls = parse_urls(body)
for new_url in new_urls:
await send_chan.send((send_chan.clone(), new_url))
async def crawl(initial_url):
# We use an unbounded channel, since our consumers are also producers, so
# any finite buffer could potentially produce a deadlock
send_chan, receive_chan = trio.open_memory_channel(float("inf"))
# Put in the first url to start us off
await send_chan.send((send_chan, url))
async with trio.open_nursery() as nursery:
for i in range(WORKER_COUNT):
nursery.start_soon(worker, receive_chan)
trio.run(crawl, "https://...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment