Skip to content

Instantly share code, notes, and snippets.

@AlexDaniel
Created July 2, 2018 02:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlexDaniel/d268f439499c18b7fbfd9bbb854a1c76 to your computer and use it in GitHub Desktop.
Save AlexDaniel/d268f439499c18b7fbfd9bbb854a1c76 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl6
use DOM::Tiny;
use HTTP::UserAgent;
my $channel = Channel.new;
sub MAIN(Str :$seed = "http://perl6.org", :$depth = 2, :$workers = 4) {
$channel.send: $seed => $depth;
for ^$workers {
start while my $p = $channel.receive {
say $*THREAD;
crawl $p.key, $p.value
}
}
await $channel.closed
}
sub crawl($url, $depth where *.so) {
say $depth, ‘ - ’, $url;
my $ua = HTTP::UserAgent.new;
react {
whenever $ua.get($url) -> $response {
my $dom = DOM::Tiny.parse: ~$response;
process $dom;
return if $depth ≤ 1;
for $dom.find('a[href]') -> $e {
if $e<href> ~~ /http/ {
$channel.send: $e<href> => $depth - 1;
} else {
$channel.send: "$url$e<href>" => $depth - 1;
}
}
}
}
}
sub process($dom) {
# Do other page processing here
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment