Created
January 9, 2012 08:03
-
-
Save dankogai/1581817 to your computer and use it in GitHub Desktop.
mirrorget.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
master: http://ftp.freebsd.org/pub/FreeBSD/ | |
mirrors: | |
- http://ftp.iij.ad.jp/pub/FreeBSD/ | |
- http://ftp.riken.jp/pub/FreeBSD/ | |
- http://ftp.sakura.ad.jp/pub/FreeBSD/ | |
- http://ftp.jaist.ac.jp/pub/FreeBSD/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# $Id: mirrorget.pl,v 0.3 2012/01/09 08:00:24 dankogai Exp dankogai $ | |
# | |
use 5.010; | |
use strict; | |
use warnings; | |
use AnyEvent; | |
use AnyEvent::HTTP; | |
use File::Basename; | |
use URI; | |
use YAML qw(LoadFile); | |
use Time::HiRes qw/time/; | |
use List::Util qw/shuffle/; | |
use HTTP::Date; | |
sub help{ | |
die "$0 mirror.yml http://...\n"; | |
} | |
my $yml = shift or help(); | |
my $uri = shift or help(); | |
my $obj = LoadFile($yml) or die "$yml:$!"; | |
my $master = URI->new_abs( $uri, $obj->{master} ); | |
my $rel = $master->rel( $obj->{master} ); | |
my $fetchsize = 1 * 1024 * 1024; | |
die "The prefix of $rel is not $obj->{master}" unless $rel !~ m{\Ahttps?://}; | |
my @mirrors = map { $_ . $rel } @{ $obj->{mirrors} }; | |
my $header = {}; | |
my $dlname = basename($uri); | |
my ( $last_modified, $content_length, $mtime ); | |
{ | |
say '# Stage 0: checking headers'; | |
my $req_headers = {}; | |
my $dlsize = 0; | |
if ( my @stat = stat($dlname) ) { | |
$req_headers->{'if-modified-since'} = time2str( $stat[9] ); | |
$dlsize = $stat[7]; | |
} | |
my $cv = AE::cv; | |
for my $uri ( $master, @mirrors ) { | |
$cv->begin; | |
my $ev; | |
$ev = http_head $uri, | |
headers => $req_headers, | |
sub { | |
$header->{$uri} = $_[1] if $_[1]->{Status} =~ /^[23]/; | |
undef $ev; | |
$cv->end; | |
}; | |
} | |
my $started = time(); | |
$cv->recv; | |
my $elapsed = time() - $started; | |
die "Unable to get header for $master" unless $header->{$master}; | |
my $status = $header->{$master}{Status}; | |
$last_modified = $header->{$master}{'last-modified'}; | |
$content_length = $header->{$master}{'content-length'}; | |
$mtime = str2time($last_modified); | |
print <<EOT; | |
Status: $status | |
Elapsed: $elapsed sec. | |
EOT | |
exit 0 if $status =~ /^3/; # && $content_length == $dlsize; | |
for my $uri ( keys %{$header} ) { | |
delete $header->{$uri} | |
unless $last_modified eq $header->{$uri}{'last-modified'}; | |
delete $header->{$uri} | |
unless $content_length eq $header->{$uri}{'content-length'}; | |
} | |
print <<EOT; | |
Last-Modified: $last_modified | |
Content-Length: $content_length | |
URLs: | |
EOT | |
say " $_" for keys %{$header}; | |
} | |
{ | |
say '# Stage 1: fetching by pieces'; | |
local $| = 1; | |
my @chunks = ( 0 .. int( $content_length / $fetchsize ) ); | |
my @uris = keys %{$header}; | |
my $fetch; | |
my $cv = AE::cv; | |
$fetch = sub { | |
my ( $uri, $chunk ) = @_; | |
my $head = $chunk * $fetchsize; | |
my $tail = $head + $fetchsize - 1; | |
my $ev; | |
$cv->begin; | |
$ev = http_get $uri, | |
headers => { Range => "bytes=$head-$tail" }, | |
sub { | |
print "$chunk $_[1]->{Status}\r"; | |
undef $ev; | |
$cv->end; | |
if ( $_[1]->{Status} == 206 ) { | |
open my $wfh, '>:raw', "$dlname.$chunk" | |
or die "$dlname.$chunk:$!"; | |
print $wfh $_[0]; | |
close $wfh; | |
$header->{$uri}{fetches}++; | |
$fetch->( $uri, shift @chunks ) if @chunks; | |
} | |
else { | |
@uris = grep { $_ ne $uri } @uris; | |
push @uris, $master unless @uris; # last resort | |
$fetch->( ( shuffle @uris )[0], $chunk ); | |
} | |
}; | |
}; | |
for my $uri (@uris) { | |
$fetch->( $uri, shift @chunks ) if @chunks; | |
} | |
my $started = time(); | |
$cv->recv; | |
my $elapsed = time() - $started; | |
say "Elapsed: $elapsed sec."; | |
for my $uri ( keys %{$header} ) { | |
printf "%4d %s\n", $header->{$uri}{fetches}, $uri; | |
} | |
} | |
{ | |
say '# Final Stage: gathering pieces'; | |
local $| = 1; | |
local $/; | |
my $started = time(); | |
open my $wfh, '>:raw', $dlname or die "$dlname:$!"; | |
for my $c ( 0 .. int( $content_length / $fetchsize ) ) { | |
print "$c\r"; | |
open my $rfh, '<:raw', "$dlname.$c" or die "$dlname.$c:$!"; | |
my $content = <$rfh>; | |
print $wfh $content; | |
close $rfh; | |
unlink "$dlname.$c"; | |
} | |
close $wfh; | |
utime $mtime, $mtime, $dlname; | |
my $elapsed = time() - $started; | |
say "Elapsed: $elapsed sec."; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment