Skip to content

Instantly share code, notes, and snippets.

@AlexDaniel
Created July 2, 2018 01:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AlexDaniel/50fcc38251c92995e3776237ed89eb0e to your computer and use it in GitHub Desktop.
Save AlexDaniel/50fcc38251c92995e3776237ed89eb0e to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl6
use DOM::Tiny;
use HTTP::UserAgent;
sub MAIN(Str :$seed = "http://perl6.org", Str :$file = "test.links", :$depth = 2) {
crawl($seed, $file, $depth);
}
sub crawl($url, $file, $depth) {
return if $depth <= 0;
say $depth, ‘ - ’, $url;
my $ua = HTTP::UserAgent.new;
my @links;
react {
whenever $ua.get($url) -> $response {
my $dom = DOM::Tiny.parse(~$response);
for $dom.find('a[href]') -> $e {
#say "Getting $e<href>";
if $e<href> ~~ /http/ {
@links.push($e<href>);
}
else {
@links.push("$url$e<href>");
}
}
my $fh = open $file, :a;
$fh.say(@links.unique.join("\n"));
$fh.close;
#while $depth ≥ 0 {
for @links -> $link {
crawl($link, $file, $depth - 1);
}
#}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment