Bowlslaw/crawler.p6 Secret

## crawler.p6
1 #!/usr/bin/env perl6
2
3 use v6;
4
5 use DOM::Tiny;
6 use HTTP::UserAgent;
7
8 my $channel = Channel.new;
9
10 sub MAIN(Str :$seed = "http://perl6.org", Int :$depth = 4, Int :$workers = 4) {
11     $channel.send: $seed => $depth;
12
13     for ^$workers {
14         start while my $p = $channel.receive {
15             say $*THREAD;
16             crawl $p.key, $p.value;
17         }
18     }
19     await $channel.closed;
20 }
21
22 sub crawl($url, $depth where *.so) {
23     say $depth, ' - ', $url;
24     my $ua = HTTP::UserAgent.new;
25     react {
26         whenever $ua.get($url) -> $response {
27             my $dom = DOM::Tiny.parse(~$response);
28
29             process $dom;
30
31             return if $depth ≤ 1;
32             for $dom.find('a[href]') -> $e {                                                                         33                 if $e<href> ~~ /http/ {
34                     $channel.send($e<href> => $depth - 1);
35                 }
36                 else {
37                     $channel.send("$url$e<href>" => $depth - 1);
38                 }
39             }
40         }
41     }
42 }
43
44 sub process($dom) {
45     # process here
46 }
	1 #!/usr/bin/env perl6
	2
	3 use v6;
	4
	5 use DOM::Tiny;
	6 use HTTP::UserAgent;
	7
	8 my $channel = Channel.new;
	9
	10 sub MAIN(Str :$seed = "http://perl6.org", Int :$depth = 4, Int :$workers = 4) {
	11 $channel.send: $seed => $depth;
	12
	13 for ^$workers {
	14 start while my $p = $channel.receive {
	15 say $*THREAD;
	16 crawl $p.key, $p.value;
	17 }
	18 }
	19 await $channel.closed;
	20 }
	21
	22 sub crawl($url, $depth where *.so) {
	23 say $depth, ' - ', $url;
	24 my $ua = HTTP::UserAgent.new;
	25 react {
	26 whenever $ua.get($url) -> $response {
	27 my $dom = DOM::Tiny.parse(~$response);
	28
	29 process $dom;
	30
	31 return if $depth ≤ 1;
	32 for $dom.find('a[href]') -> $e { 33 if $e<href> ~~ /http/ {
	34 $channel.send($e<href> => $depth - 1);
	35 }
	36 else {
	37 $channel.send("$url$e<href>" => $depth - 1);
	38 }
	39 }
	40 }
	41 }
	42 }
	43
	44 sub process($dom) {
	45 # process here
	46 }