Skip to content

Instantly share code, notes, and snippets.

@jnthn
Created June 29, 2018 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jnthn/b358ebfb6ea6c366834896b9f62ba92b to your computer and use it in GitHub Desktop.
Save jnthn/b358ebfb6ea6c366834896b9f62ba92b to your computer and use it in GitHub Desktop.
use Cro::HTTP::Client;
use Cro::Uri;
sub crawl($initial-url) {
react {
my %seen;
my $client = Cro::HTTP::Client.new;
crawl-url(Cro::Uri.parse($initial-url));
sub crawl-url(Cro::Uri $url) {
return if %seen{$url}++;
say "Getting $url";
whenever $client.get($url) -> $response {
if $response.content-type.type-and-subtype eq 'text/html' {
get-links($response, $url);
}
QUIT {
default {
note "$url failed: " ~ .message;
}
}
}
}
sub get-links($response, $base) {
whenever $response.body-text -> $text {
for $text.match(/'href="' <!before \w+':'> <( <-["]>+/, :g) {
crawl-url $base.add(~$_);
}
}
}
}
}
crawl("https://commaide.com/");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment