Skip to content

Instantly share code, notes, and snippets.

@jrk
Created July 22, 2009 08:03
Show Gist options
  • Save jrk/151872 to your computer and use it in GitHub Desktop.
Save jrk/151872 to your computer and use it in GitHub Desktop.
An improved replacement for scp, with resume and advanced compression support. By Andrei Alexandrescu.
#!/usr/bin/env perl
# Via: http://erdani.org/code/scpi.html
################################################################################
## Copyright (c) 2006 by Andrei Alexandrescu
## Permission to use, copy, modify, distribute and sell this software for any
## purpose is hereby granted without fee, provided that the above copyright
## notice appear in all copies and that both that copyright notice and this
## permission notice appear in supporting documentation.
## The author makes no representations about the
## suitability of this software for any purpose. It is provided "as is"
## without express or implied warranty.
################################################################################
use strict;
use warnings;
use Pod::Usage;
use Getopt::Long qw(:config no_ignore_case bundling);
use IPC::Open2;
use File::Basename;
use Time::HiRes qw(time sleep);
sub CopySpec($$);
sub OpenCmdChannel($);
sub SysReadLine($);
sub FormatSize($;$);
sub CopyFile($$$$$);
sub ReadAll($\$);
sub ReadChunk($\$);
sub yap(@);
my @sshOptions = ("-e", "none");
my $batchMode = 0;
my $ssh = "ssh";
my $limitBandwidth = 2_000_000_000;
my $preserveTime = 0;
my $quiet = 0;
my $recursive = 0;
my $verbose = 0;
my $restMode = 0;
my $compress = "";
my $expand = "cat";
my ($srcRdr, $srcWtr, $dstRdr, $dstWtr);
my $sharedCode = q{
my $bufSize = 32 * 1024;
sub SysReadLine($) {
my ($h) = @_;
my $buf = "";
while (sysread($h, my $c, 1)) {
$buf .= $c;
last if ($c eq "\n");
}
$buf;
}
sub ReadChunk($\$) {
my ($h, $buf) = @_;
$$buf = "";
my $len = int(SysReadLine($h));
while ((my $diff = $len - length($$buf)) > 0) {
sysread($h, $$buf, $diff, length($$buf)) or die "Cannot read from stream";
}
$len;
}
sub WriteChunk($$) {
my $h = $_[0];
my $buf = \$_[1];
my $len = length($$buf);
syswrite($h, "$len\n") && syswrite($h, $$buf) == $len
or die "Cannot write to stream";
$len;
}
};
eval($sharedCode);
die $@ if $@;
sub AddSSHOption($$) { push @sshOptions, "-$_[0]"; }
sub AddSSHOption2($$) { push @sshOptions, "-$_[0]", "$_[1]"; }
GetOptions(
"h|help|?" => sub { pod2usage(1); },
"1" => \&AddSSHOption,
"2" => \&AddSSHOption,
"4" => \&AddSSHOption,
"6" => \&AddSSHOption,
"B" => \$batchMode,
"C" => \&AddSSHOption,
"c=s" => \&AddSSHOption2,
"F=s" => \&AddSSHOption2,
"i=s" => \&AddSSHOption2,
"l=i" => sub { $limitBandwidth = 1024 * $_[1] },
"o=s" => \&AddSSHOption2,
"P=i" => sub { AddSSHOption2("p", $_[1]); },
"p" => \$preserveTime,
"q" => \$quiet,
"r" => \$recursive,
"S=s" => \$ssh,
"v" => \&AddSSHOption,
"R" => \$restMode,
"b" => sub { $compress = "|bzip2"; $expand = "bunzip2" },
"g" => sub { $compress = "|gzip"; $expand = "gunzip" },
"n" => sub { $compress = ""; $expand = "cat" },
"d" => sub { ++$verbose },
);
# main code starts here; scoped for the sake of symbol locality
{
pod2usage(1) if (@ARGV < 2);
my $dst = pop(@ARGV);
my ($dstHost, $dstSpec) = ($dst =~ qr{(?:(.+?)\:)?(.*)});
$dstHost = '' if !$dstHost;
$dstSpec = '' if !$dstSpec;
for my $src (@ARGV) {
# Parse source
my ($srcHost, $srcSpec) = ($src =~ qr{(?:(.+?)\:)?(.*)});
$srcHost = '' if !$srcHost;
yap("Source host: `$srcHost', target host: `$dstHost'\n");
# Start communicating with the two hosts
($srcRdr, $srcWtr) = OpenCmdChannel($srcHost);
($dstRdr, $dstWtr) = OpenCmdChannel($dstHost);
# Here's the meat
CopySpec($srcSpec, $dstSpec);
# Cleanuppa
close($srcWtr) or warn("Cannot close source channel");
close($dstWtr) or warn("Cannot close target channel");
}
}
################################################################################
sub CopySpec($$) {
my ($srcSpec, $dstSpec) = @_;
# support spaces in destination, while allowing other metachars
$srcSpec =~ s/(\s)/\\$1/g;
yap("Source filespec: `$srcSpec', target filespec: `$dstSpec'\n");
#my $fileInfoFormat = q{"q{%n}, %s, q{%A}, "};
print $srcWtr qq{<stat --format="q{%n}, %s, q{%A}, "},
qq{ -- $srcSpec 2>/dev/null\n};
ReadAll($srcRdr, my $buf);
yap("Source names, sizes, attributes: $buf\n");
my @srcFiles = eval("($buf)");
for (my $i = 0; $i < @srcFiles; $i += 3) {
my $srcName = $srcFiles[$i];
my $srcSize = $srcFiles[$i + 1];
my $srcAttr = $srcFiles[$i + 2];
if ($srcAttr =~ /^d/) {
if (!$recursive) {
print "Ignoring directory `$srcName'\n";
next;
}
yap("Recursive copy\n");
$dstSpec =~ s{/$}{};
my $targetDir = "$dstSpec/" . basename($srcName) . '/';
print $dstWtr qq{<mkdir --parents '$targetDir'; echo \$?\n};
ReadAll($dstRdr, $buf);
$buf = int($buf);
if ($buf) {
warn "Cannot create directory `$targetDir', error code $buf; skipping `$srcName'";
next;
}
CopySpec($srcName . '/*', $targetDir);
next;
}
my $dstName = $dstSpec;
if ($dstName eq "" || $dstName =~ qr{/$} || $dstName =~ qr{(^|/)\.?\.$}) {
# it's a directory
$dstName =~ s{/?$}{/};
$dstName .= basename($srcName);
}
my $dstSize = 0;
if ($restMode) {
print $dstWtr qq{<stat --format='%s' -- '$dstName' 2>/dev/null\n};
ReadAll($dstRdr, $buf);
yap("Size of destination file `$dstName': $buf\n");
$dstSize = int($buf) if ($buf);
}
my $diff = $srcSize - $dstSize;
#next if ($restMode && $diff == 0);
yap("Copying '$srcName' to '$dstName' from byte $dstSize to byte $srcSize ",
"(total: $diff bytes)\n");
if ($restMode && $diff < 0) {
print STDERR "Destination '$dstName' exists but is larger than ",
"source '$srcName', give up on -R\n";
next;
}
CopyFile($srcName, $srcSize, $dstSize, $dstName, $restMode);
}
}
sub yap(@) {
return if (!$verbose);
print join('', @_);
}
sub OpenCmdChannel($) {
my ($host) = @_;
# The command above should have no single quote, sigh
my $perlCmd = $sharedCode . q{
use strict;
use warnings;
use bytes;
my $buf;
$| = 1;
binmode(STDIN) or die "Cannot binmode stdin";
binmode(STDOUT) or die "Cannot binmode stdout";
while (my $ln = SysReadLine(\*STDIN)) {
my $stream;
if ($ln =~ /^</) {
$ln = substr($ln, 1);
open($stream, "$ln|") or die "Cannot open $ln: $!";
binmode($stream) or die "Cannot binmode stream";
while (sysread($stream, $buf, $bufSize)) {
WriteChunk(\*STDOUT, $buf);
}
WriteChunk(\*STDOUT, "");
} elsif ($ln =~ /^>/) {
$ln = substr($ln, 1);
open($stream, "|$ln") or die "Cannot open $ln: $!";
binmode($stream) or die "Cannot binmode stream";
while (ReadChunk(\*STDIN, $buf)) {
syswrite($stream, $buf) == length($buf) or die "Cannot write!";
}
} else {
die "Unrecognized command line: $ln";
}
close($stream);
}
};
$perlCmd =~ s/\s+/ /g;
my @pipe = $host
# open via ssh
? ($ssh, @sshOptions, $host, "perl -e '$perlCmd'")
# local pipe
: ("zsh", "-c", "perl -e '$perlCmd'");
#"zsh", "-c", "perl -e '$perlCmd'"
open2(my $rdr, my $wtr, @pipe) or die "Cannot open pipe `@pipe': $!";
binmode($rdr) && binmode($wtr) or die "Cannot binmode channel";
print $wtr qq{<echo ping\n};
ReadAll($rdr, my $response);
die "Expected `ping', got `$response' in command `@pipe'"
if $response ne "ping\n";
($rdr, $wtr);
}
sub CopyFile($$$$$) {
my ($src, $srcSize, $srcOffset, $dst, $append) = @_;
my $redir = $append ? '>>' : '>';
# Send commands to source
my $payload = $srcSize - $srcOffset;
my $nTh = $srcOffset + 1;
print $srcWtr qq{<tail --bytes=+$nTh -- $src $compress\n}
or die "Cannot send commands to source";
# Send commands to target
print $dstWtr qq{>$expand $redir $dst\n}
or die "Cannot send commands to destination";
my $speed = 0;
my $eta;
my $started = time();
my $bytesSinceLastTime = 0;
my $buf;
my $accum = $srcOffset;
my $moreData = 1;
# ok, now do the deed
while ($moreData) {
$moreData = ReadChunk($srcRdr, $buf);
WriteChunk($dstWtr, $buf) == length($buf) or die "Cannot write: $!\n";
$accum += length($buf);
$bytesSinceLastTime += length($buf);
my $newTime = time();
my $delta = $newTime - $started;
next if ($delta < 1 && $moreData);
# Compute speed and ETA
my $instaSpeed = $bytesSinceLastTime / $delta;
$speed = $speed == 0
? $instaSpeed
: (19 * $speed + $instaSpeed) / 20;
# Throttle bandwidth
if ($speed > $limitBandwidth) {
my $sleep = $bytesSinceLastTime / $limitBandwidth - $delta;
if ($sleep > 0) {
sleep($sleep);
$speed = $limitBandwidth;
}
}
$started = $newTime;
$bytesSinceLastTime = 0;
#
next if ($quiet);
$eta = $speed ? ($srcSize - $accum) / $speed : 0; # seconds
# Print the stuff
my $p = sprintf("%.2f", $srcSize ? $accum * 100 / $srcSize : 100);
my $hrs = int($eta / 3600.);
my $mins = int(($eta - 3600 * $hrs) / 60);
my $rhs = sprintf(" %.2f%% %s %.0fKB/s %02u:%02u ETA",
$p, FormatSize($accum), $speed / 1024., $hrs, $mins);
my $fsLen = 80 - length($rhs) - 2;
printf STDERR "%-*.*s %s\r", $fsLen, $fsLen, basename($src), $rhs;
}
print STDERR "\n";
}
sub FormatSize($;$) {
my ($size, $suffix) = @_;
$suffix = $suffix || "B";
if ($size < 1000) {
return sprintf("%.5s$suffix", $size);
}
$size /= 1024.;
return FormatSize($size,
$suffix eq "B" ? "K" : $suffix eq "K" ? "M" : $suffix eq "M" ? "G" : "T");
}
sub ReadAll($\$) {
my ($h, $r) = @_;
$$r = "";
my $buf;
while (ReadChunk($h, $buf)) {
$$r .= $buf;
}
length($$r);
}
__END__
Generate html with:
$ pod2html scpi.pl >scpi.html --css="/style.css" --title=scpi
=head1 SCPI: B<S>ECURE B<C>OB<P>Y B<I>MPROVED
scpi - scp improved with restartable downloads and advanced compression.
=begin html
<style>
tt, pre, code {
font-family:"Courier";
font-size:105%;
}
</style>
<script type="text/javascript"><!--
google_ad_client = "pub-7042296581632428";
google_ad_width = 728;
google_ad_height = 90;
google_ad_format = "728x90_as";
google_ad_type = "text_image";
google_ad_channel ="";
google_color_border = "336699";
google_color_bg = "C1CCCD";
google_color_link = "0000FF";
google_color_url = "008000";
google_color_text = "000000";
//--></script>
<script type="text/javascript"
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
=end html
=head1 SYNOPSIS
C<scpi.pl [options] [[user@]host:]src [...] [[user@]host:]dest>
=head1 DESCRIPTION
C<scpi> (C<scp> improved) is intended to be a drop-in replacement for the
C<scp> utility that comes together with any Secure Shell (ssh) implementation.
C<scpi> implements the much-needed restartable downloads that C<scp> famously
doesn't support. As a couple of perks, C<scpi> implements a few other
niceties such as optional use of advanced compression (C<gzip>/C<bzip2>)
and copying multiple files to one destination directory in one shot.
Options:
-h, -help brief help message
-1 passed to ssh
-1 passed to ssh
-4 passed to ssh
-6 passed to ssh
-B batch usage (not implemented)
-C passed to ssh
-c str passed to ssh
-F str passed to ssh
-i str passed to ssh
-l limit bandwidth used
-o str passed to ssh
-P int passed to ssh
-p preserve file times (not implemented)
-q don't display progress
-r recurse
-S str choose the ssh to use
-v passed to ssh
-R restart mode (resume broken downloads)
-n use no compression (default)
-b use bzip2 for compressing the data stream
(%, speed, and ETA will be inaccurate)
-g use gzip for compressing the data stream
(%, speed, and ETA will be inaccurate)
-d debug (extra output)
C<scpi> is implemented in Perl, and requires Perl on the machine it's running,
as well as on the source and/or destination hosts. Also, C<scpi> uses the C<ssh>
executable on the client machine (but not C<scp>).
The main target audience for C<scpi> consists of people who enjoy C<scp>'s
security but are unhappy with C<scp>'s lack of a "restart" capability---if
the connection drops in the middle of a copy, C<scp> forces redoing the
transfer all over again. C<scpi> implements robust restarts. When invoked with
the -R flag, C<scpi> assumes that the existing target file, if any, is a
valid fragment left from a previous C<scpi> (or C<scp> for that matter)
execution, and that the source file hasn't changed in the meantime.
It follows that C<scpi> is best at transferring large files. To better support
that ability, C<scp> supports C<gzip> or C<bzip2> compression of the data
stream, which can be a big win in transmission speed at a low computational
cost, particularly over an agglomerated LAN. (You can also pass C<scpi> the
-C flag, which C<scpi> passes along to C<ssh> instructing it to compress the
stream itself (but not the source and destination files). Generally, the
newer C<bzip2> algorithm is expected to be better than either C<gzip> or
C<ssh>'s native compression.)
=head1 DOWNLOAD
C<scpi.pl> is available from http://erdani.org/code/scpi.pl. Save the file to a
local Unix (including Cygwin) directory and use it as you'd use C<scp>.
=head1 IMPLEMENTATION
You don't need to read this unless you plan on changing scpi yourself, or you
enjoy cool hacks.
In essence, scpi starts ssh through a bidirectional pipe and implements its own
little data transfer protocol on top of that pipe. The protocol simply
prescribes that data transfers take place in "chunks". Each chunk consists of a
number N (in decimal ASCII) followed by a newline, followed by N bytes of data.
By convention, if N is zero, that means an entire transfer---in our case, a file
of arbitrary size---has ended. Transmitting a large file in chunks has the
advantage that the size of the file mustn't be known in advance, advantage
exploited in scpi's implementation.
The need for such a protocol arises from the requirement of reusing one
bidirectional stream (namely ssh's stdin and stdout) for multiple binary
transfers. If only one blob of data were to be transferred, closing the
stream would naturally signify the end of the data. However, a more
sophisticated communication demands a reliable way to detect the size
of a data block.
With this little protocol in place, scpi implements and uses internally a
very simple "command line" interface---a way to execute commands on a
remote system and either feed data
to their standard input or read their standard output. Simplicity and reuse of
good old utilities (such as C<tail> and C<cat>) make scpi robust and
easy to debug.
=head1 AUTHOR
Written by Andrei Alexandrescu. Mosey to http://erdani.org/email to contact
the author.
=head1 KNOWN BUGS
As of this time, the -p (preserve file time) and -B (batch usage) are ignored,
the first because the author didn't need it yet, and the second because the
author didn't have the time to look into what it really means.
C<scpi> has not been tested with file names containing a newline, and it
almost sure won't work with them. Quotes and other special characters inside
file names are likely to cause trouble as well.
The implementation of bandwidth limitation hasn't been really thought through.
When the option -l is in effect, there is some bandwith throttling, but perhaps
the regularization algorithm could be vastly enhanced.
When using -b or -g (compression with C<bzip2> or C<gzip>), the percent
transferred, speed, and estimated time of arrival (ETA) will likely be
inaccurate. This is because they reflect the amount of compressed, not "real",
data transferred. On the bright side, at the end of each file's transfer, the
percent complete will show the effective stream compression achieved on that
file.
=head1 SEE ALSO
The documentation for scp shows more details on the options. B<scpi> emulates
scp's options (and, just like scp, passes some of them to ssh).
=head1 COPYRIGHT
Copyright (c) 2006 Andrei Alexandrescu.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment