Skip to content

Instantly share code, notes, and snippets.

@dankogai
Created January 9, 2012 08:03
Show Gist options
  • Save dankogai/1581817 to your computer and use it in GitHub Desktop.
Save dankogai/1581817 to your computer and use it in GitHub Desktop.
mirrorget.pl
master: http://ftp.freebsd.org/pub/FreeBSD/
mirrors:
- http://ftp.iij.ad.jp/pub/FreeBSD/
- http://ftp.riken.jp/pub/FreeBSD/
- http://ftp.sakura.ad.jp/pub/FreeBSD/
- http://ftp.jaist.ac.jp/pub/FreeBSD/
#!/usr/bin/env perl
#
# $Id: mirrorget.pl,v 0.3 2012/01/09 08:00:24 dankogai Exp dankogai $
#
use 5.010;
use strict;
use warnings;
use AnyEvent;
use AnyEvent::HTTP;
use File::Basename;
use URI;
use YAML qw(LoadFile);
use Time::HiRes qw/time/;
use List::Util qw/shuffle/;
use HTTP::Date;
sub help{
die "$0 mirror.yml http://...\n";
}
my $yml = shift or help();
my $uri = shift or help();
my $obj = LoadFile($yml) or die "$yml:$!";
my $master = URI->new_abs( $uri, $obj->{master} );
my $rel = $master->rel( $obj->{master} );
my $fetchsize = 1 * 1024 * 1024;
die "The prefix of $rel is not $obj->{master}" unless $rel !~ m{\Ahttps?://};
my @mirrors = map { $_ . $rel } @{ $obj->{mirrors} };
my $header = {};
my $dlname = basename($uri);
my ( $last_modified, $content_length, $mtime );
{
say '# Stage 0: checking headers';
my $req_headers = {};
my $dlsize = 0;
if ( my @stat = stat($dlname) ) {
$req_headers->{'if-modified-since'} = time2str( $stat[9] );
$dlsize = $stat[7];
}
my $cv = AE::cv;
for my $uri ( $master, @mirrors ) {
$cv->begin;
my $ev;
$ev = http_head $uri,
headers => $req_headers,
sub {
$header->{$uri} = $_[1] if $_[1]->{Status} =~ /^[23]/;
undef $ev;
$cv->end;
};
}
my $started = time();
$cv->recv;
my $elapsed = time() - $started;
die "Unable to get header for $master" unless $header->{$master};
my $status = $header->{$master}{Status};
$last_modified = $header->{$master}{'last-modified'};
$content_length = $header->{$master}{'content-length'};
$mtime = str2time($last_modified);
print <<EOT;
Status: $status
Elapsed: $elapsed sec.
EOT
exit 0 if $status =~ /^3/; # && $content_length == $dlsize;
for my $uri ( keys %{$header} ) {
delete $header->{$uri}
unless $last_modified eq $header->{$uri}{'last-modified'};
delete $header->{$uri}
unless $content_length eq $header->{$uri}{'content-length'};
}
print <<EOT;
Last-Modified: $last_modified
Content-Length: $content_length
URLs:
EOT
say " $_" for keys %{$header};
}
{
say '# Stage 1: fetching by pieces';
local $| = 1;
my @chunks = ( 0 .. int( $content_length / $fetchsize ) );
my @uris = keys %{$header};
my $fetch;
my $cv = AE::cv;
$fetch = sub {
my ( $uri, $chunk ) = @_;
my $head = $chunk * $fetchsize;
my $tail = $head + $fetchsize - 1;
my $ev;
$cv->begin;
$ev = http_get $uri,
headers => { Range => "bytes=$head-$tail" },
sub {
print "$chunk $_[1]->{Status}\r";
undef $ev;
$cv->end;
if ( $_[1]->{Status} == 206 ) {
open my $wfh, '>:raw', "$dlname.$chunk"
or die "$dlname.$chunk:$!";
print $wfh $_[0];
close $wfh;
$header->{$uri}{fetches}++;
$fetch->( $uri, shift @chunks ) if @chunks;
}
else {
@uris = grep { $_ ne $uri } @uris;
push @uris, $master unless @uris; # last resort
$fetch->( ( shuffle @uris )[0], $chunk );
}
};
};
for my $uri (@uris) {
$fetch->( $uri, shift @chunks ) if @chunks;
}
my $started = time();
$cv->recv;
my $elapsed = time() - $started;
say "Elapsed: $elapsed sec.";
for my $uri ( keys %{$header} ) {
printf "%4d %s\n", $header->{$uri}{fetches}, $uri;
}
}
{
say '# Final Stage: gathering pieces';
local $| = 1;
local $/;
my $started = time();
open my $wfh, '>:raw', $dlname or die "$dlname:$!";
for my $c ( 0 .. int( $content_length / $fetchsize ) ) {
print "$c\r";
open my $rfh, '<:raw', "$dlname.$c" or die "$dlname.$c:$!";
my $content = <$rfh>;
print $wfh $content;
close $rfh;
unlink "$dlname.$c";
}
close $wfh;
utime $mtime, $mtime, $dlname;
my $elapsed = time() - $started;
say "Elapsed: $elapsed sec.";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment