Skip to content

Instantly share code, notes, and snippets.

@AlexDaniel
Created July 2, 2018 01:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlexDaniel/4b0f0ac86840290b80e59221a3e459b0 to your computer and use it in GitHub Desktop.
Save AlexDaniel/4b0f0ac86840290b80e59221a3e459b0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl6
use DOM::Tiny;
use HTTP::UserAgent;
my $channel = Channel.new;
sub MAIN(Str :$seed = "http://perl6.org", :$depth = 2) {
$channel.send: $seed => $depth;
for $channel.list.race(:1degree, :1batch) {
crawl .key, .value
}
}
sub crawl($url, $depth where *.so) {
say $depth, ‘ - ’, $url;
my $ua = HTTP::UserAgent.new;
react {
whenever $ua.get($url) -> $response {
my $dom = DOM::Tiny.parse: ~$response;
process $dom;
return if $depth ≤ 1;
for $dom.find('a[href]') -> $e {
if $e<href> ~~ /http/ {
$channel.send: $e<href> => $depth - 1;
} else {
$channel.send: "$url$e<href>" => $depth - 1;
}
}
}
}
}
sub process($dom) {
# Do other page processing here
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment