Skip to content

Instantly share code, notes, and snippets.

@ksurent
Created January 23, 2013 10:14
Show Gist options
  • Save ksurent/4603988 to your computer and use it in GitHub Desktop.
Save ksurent/4603988 to your computer and use it in GitHub Desktop.
AE::HTTP + AE::AIO combo
#!/usr/bin/env perl
use v5.14;
use List::Util qw(shuffle);
use Parse::CPAN::Packages::Fast;
use List::MoreUtils qw(uniq any);
use AE;
use IO::AIO;
use AnyEvent::AIO;
use AnyEvent::HTTP;
my $index = Parse::CPAN::Packages::Fast->new('02packages.details.txt.gz');
my @distributions = shuffle
uniq
map $index->package($_)->distribution->dist,
$index->packages;
@distributions = @distributions[0 .. 999];
my @sections = qw(NAME ABSTRACT DESCRIPTION);
my $cv = AE::cv;
my $ctrl_c = AE::signal INT => sub {
$cv->send;
};
sub do_next() {
return add_request(shift @distributions) if @distributions;
return $cv->send;
}
do_next for 1 .. 10;
$cv->recv;
sub add_request {
my $dist = shift;
my $dist_colon = $dist =~ s/-/::/gr;
http_get "http://api.metacpan.org/pod/$dist_colon?content-type=text/plain", sub {
my($body, $hdr) = @_;
return do_next unless $hdr->{Status} eq '200';
my $relevant = extract_relevant($body);
return do_next if not defined $relevant or not length $relevant;
aio_open "downloaded/$dist.txt", IO::AIO::O_WRONLY | IO::AIO::O_CREAT, 0644, sub {
my $fh = shift or $cv->croak("aio_open(): $!");
my $wrt;
$wrt = sub {
my($data, $data_offset, $cb) = @_;
use bytes;
aio_write $fh, undef, length $data, $data, $data_offset, sub {
my $written = shift;
$cv->croak("aio_write(): $!") if $written < 0;
$data_offset += $written;
if($data_offset < length $data) {
$wrt->($data, $data_offset, $cb);
}
else {
undef $wrt;
$cb->();
}
};
};
$wrt->(
$relevant,
0,
sub {
aio_close $fh, sub {
do_next;
}
},
);
};
do_next;
};
}
sub extract_relevant {
my $body = shift;
my $relevant;
open my $fh, '<', \$body;
my $in_section = 0;
while(<$fh>) {
if(/^([A-Z]+(?:\s+[A-Z])*)$/) {
$in_section = $in_section ? 0 : $1;
}
elsif($in_section and any { $in_section eq $_ } @sections) {
$relevant .= $_ . ' ';
}
}
close $fh;
$relevant;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment