Created
September 6, 2014 05:46
-
-
Save anonymous/8b8f952f304ec96ba188 to your computer and use it in GitHub Desktop.
how to do async web getter?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this doesn't work. I think I need a dispatcher and | |
# some polling, not sure where to put it. | |
import | |
asyncdispatch, asyncnet, htmlparser, xmltree, httpclient, strutils, | |
strtabs, streams, uri, sets, os | |
var num_running = 0 | |
proc crawl(url: string, client: PAsyncHttpClient = newAsyncHttpClient()) {.async.} = | |
num_running += 1 | |
echo("Crawling ", url) | |
let resp = await client.get(url) | |
if resp.status.startswith("200"): | |
echo len(resp.body) | |
else: | |
echo "error: " & resp.status | |
num_running -= 1 | |
# how can i limit this to x concurrent connections? | |
# does sleep() block the async "crawl"? | |
# if so, how to do this? | |
var concurrency = 3 | |
for line in lines(stdin): | |
# sleep for 1 second if all connections busy | |
while num_running > concurrency: sleep(1000) | |
discard crawl("http://" & line & "/") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment