Skip to content

Instantly share code, notes, and snippets.

@lelegard
Created May 10, 2018 17:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lelegard/be91bbc5e80a978d4d940f7f4724102e to your computer and use it in GitHub Desktop.
Save lelegard/be91bbc5e80a978d4d940f7f4724102e to your computer and use it in GitHub Desktop.
Script to recursively list, synchronize or delete directories on a remote site using SFTP only
#!/bin/bash
#-----------------------------------------------------------------------------
#
# This script recursively lists a remote directory using SFTP.
# It generates a deterministic output format, suitable for scripting.
# Syntax: see showhelp() function below.
#
#-----------------------------------------------------------------------------
#
# Copyright (c) 2018, Thierry Lelegard
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
#
#-----------------------------------------------------------------------------
SCRIPT=$(basename $BASH_SOURCE)
showhelp()
{
cat >&2 <<EOF
Recursively list a remote directory using SFTP.
Usage: $SCRIPT [options] [user@]host [directory]
The output is fully deterministic to be easily parsed by a script. Each line
describes one file. Fields are separated with one space. The file name comes
last and may contain spaces. The output fields are:
type mode owner group size date file/path
Options:
-d
--date
Display the full dates instead of the number of seconds since epoch.
Note that, in that case, the date field contains spaces and it is no
longer possible to parse the output lines using 'cut'.
--delete
Recursively delete files and directories instead of listing them.
--dry-run
With --delete or --synchronize, display what would be done but don't do it.
-e 'wildcard-spec'
--exclude 'wildcard-spec'
Exclude from the list any file or directory matching the wildcard
specification.
-h
--help
Display this help text.
-m
--mode
Display the full mode string instead of the octal value.
-n
--name-only
Display only file names, not full details.
-s local-directory
--synchronize local-directory
Synchronize the content of 'local-directory' with the remote directory.
New or newer local files are uploaded. Remote files without equivalent
local file are not deleted.
-r
--reverse-directory
Display each directory name after its contents instead of before.
-v
--verbose
Display verbose information with --delete.
EOF
exit 1
}
#-----------------------------------------------------------------------------
# Decode command line arguments
#-----------------------------------------------------------------------------
error() { echo >&2 "$SCRIPT: $*"; exit 1; }
usage() { echo >&2 "usage: $SCRIPT [options] [user@]host [directory]"; exit 1; }
OPT_HOST=
OPT_ROOT=
OPT_SYNC=
OPT_PREFIX=
OPT_DRYRUN=false
OPT_RECURSE=false
OPT_DELETE=false
OPT_FULL_DATE=false
OPT_FULL_MODE=false
OPT_NAME_ONLY=false
OPT_REV_DIR=false
OPT_VERBOSE=false
OPT_EXCLUDE=(.DS_Store)
while [[ $# -gt 0 ]]; do
case "$1" in
-d|--date)
OPT_FULL_DATE=true
OPTIONS="$OPTIONS --date"
;;
--delete)
OPT_DELETE=true
OPT_REV_DIR=true
OPTIONS="$OPTIONS --delete"
;;
--dry*)
OPT_DRYRUN=true
OPTIONS="$OPTIONS --dry-run"
;;
-e|--exclude)
shift; [[ $# -gt 0 ]] || usage
wild="${1//\'/}"
OPT_EXCLUDE+=("$wild")
OPTIONS="$OPTIONS --exclude '$wild'"
;;
-h|--help)
showhelp
;;
-m|--mode)
OPT_FULL_MODE=true
OPTIONS="$OPTIONS --mode"
;;
-n|--name*)
OPT_NAME_ONLY=true
OPTIONS="$OPTIONS --name-only"
;;
--prefix)
shift; [[ $# -gt 0 ]] || usage
OPT_PREFIX="$1"
;;
--recurse)
OPT_RECURSE=true
OPTIONS="$OPTIONS --recurse"
;;
-r|--reverse*)
OPT_REV_DIR=true
OPTIONS="$OPTIONS --reverse-directory"
;;
-s|--sync*)
shift; [[ $# -gt 0 ]] || usage
OPT_SYNC="$1"
;;
-v|--verbose)
OPT_VERBOSE=true
OPTIONS="$OPTIONS --verbose"
;;
-*)
usage
;;
*)
if [[ -z "$OPT_HOST" ]]; then
OPT_HOST="$1"
elif [[ -z "$OPT_ROOT" ]]; then
OPT_ROOT="$1"
else
usage
fi
;;
esac
shift
done
# Host is mandatory.
[[ -z "$OPT_HOST" ]] && usage
if $OPT_DELETE && [[ -n "$OPT_SYNC" ]]; then
error "cannot --delete and --synchronize at the same time"
fi
#-----------------------------------------------------------------------------
# Basic functions
#-----------------------------------------------------------------------------
# Check prerequisites.
case $(uname -s) in
Darwin)
GNUDATE=$(which gdate 2>/dev/null)
GNUSTAT=$(which gstat 2>/dev/null)
GNUSED=$(which gsed 2>/dev/null)
[[ -z "$GNUDATE" || -z "$GNUSTAT" ]] && error "install GNU coreutils, for instance 'brew install coreutils'"
[[ -z "$GNUSED" ]] && error "install GNU sed, for instance 'brew install gnu-sed'"
;;
Linux)
GNUDATE=date
GNUSTAT=stat
GNUSED=sed
;;
*)
GNUDATE=$(which gdate 2>/dev/null)
GNUSTAT=$(which gstat 2>/dev/null)
GNUSED=$(which gsed 2>/dev/null)
[[ -z "$GNUDATE" ]] && error "GNU 'date' utility not found"
[[ -z "$GNUSTAT" ]] && error "GNU 'stat' utility not found"
[[ -z "$GNUSED" ]] && error "GNU 'sed' utility not found"
;;
esac
# Convert a date into number of seconds since epoch.
date_to_seconds() { $GNUDATE "--date=$1" +%s 2>/dev/null; }
# Size in bytes of a file.
file_size() { $GNUSTAT --format %s "$1"; }
# Modification date of a file in seconds since epoch.
file_date() { $GNUSTAT --format %Y "$1"; }
#-----------------------------------------------------------------------------
# Special case of --delete
#-----------------------------------------------------------------------------
if $OPT_DELETE && ! $OPT_RECURSE && ! $OPT_DRYRUN; then
[[ -z "$OPT_ROOT" ]] && error "Must provide a directory with --delete to avoid accidents"
($BASH_SOURCE $OPTIONS "$OPT_HOST" "$OPT_ROOT" --recurse; echo "-rmdir '$OPT_ROOT'") | \
sftp -q -b - "$OPT_HOST" | \
($OPT_VERBOSE && $GNUSED -e 's/^sftp> *-*//' || grep -v '^sftp>')
exit
fi
#-----------------------------------------------------------------------------
# Special case of --synchronize
#-----------------------------------------------------------------------------
if [[ -n "$OPT_SYNC" ]]; then
[[ -d "$OPT_SYNC" ]] || error "$OPT_SYNC is not a valid local directory"
# List remote files.
TMPFILE=/tmp/sftp.$$
$BASH_SOURCE $OPTIONS "$OPT_HOST" "$OPT_ROOT" --recurse >$TMPFILE
# Exclusion options for find command.
opts=""
for e in "${OPT_EXCLUDE[@]}"; do
opts="${opts}${opts:+ -o } -name $e"
done
# Loop on all local files to synchronize.
find "$OPT_SYNC" \( $opts \) -prune -o \( \( -type d -o -type f \) -print \) | \
$GNUSED -e "s|^$OPT_SYNC/*||" -e '/^ *$/d' | \
while read file; do
# Find matching line on remote site.
rem=$(grep -m 1 " $file\$" $TMPFILE)
# Full local path.
loc_file="${OPT_SYNC}/${file}"
if [[ -d "$loc_file" ]]; then
# This is a directory, create if does not exist remotely.
if [[ -z "$rem" ]]; then
echo "#### missing remote directory $file"
if ! $OPT_DRYRUN; then
echo "mkdir '${OPT_ROOT}${OPT_ROOT:+/}${file}'" | sftp -q -b - "$OPT_HOST"
fi
fi
else
# This is a file, copy if size is different or date is older.
copy=false
if [[ -z "$rem" ]]; then
echo "#### missing remote file $file"
copy=true
else
rem_size=$(cut <<<"$rem" -d ' ' -f 5)
rem_date=$(cut <<<"$rem" -d ' ' -f 6)
loc_size=$(file_size "$loc_file")
loc_date=$(file_date "$loc_file")
if [[ -z "$rem_size" || "$rem_size" -ne "$loc_size" || -z "$rem_date" || "$rem_date" -lt "$loc_date" ]]; then
echo "#### obsolete remote file $file"
copy=true
fi
fi
if $copy && ! $OPT_DRYRUN; then
rem_dir=$(dirname "${OPT_ROOT}${OPT_ROOT:+/}${file}")
loc_dir=$(dirname "$loc_file")
base=$(basename "$file")
echo -e "lcd '$loc_dir'\ncd '$rem_dir'\nput '$base'" | sftp -q -b - "$OPT_HOST"
fi
fi
done
rm -f $TMPFILE
exit
fi
#-----------------------------------------------------------------------------
# Perform SFTP listing.
#-----------------------------------------------------------------------------
# Just in case it influences SFTP output.
export LANG=en_US.UTF-8
# Width of the date field in SFTP output.
dwidth=0
# Loop on all sftp ls output lines.
# Remove lines for ".." but keep lines for "." (used later).
echo "ls -la '$OPT_ROOT'" | sftp -q -b - "$OPT_HOST" | grep '^[d-]' | grep -v ' \.\.$' | while read line; do
# The main problem with SFTP output is that the width and format of the date field
# is not predictable and may contain spaces. Split the line in two, prefix and suffix.
# Get mode, #links, owner, group, size in prefix and date and file name in suffix.
# Fields in the prefix are predictable and do not contain spaces.
prefix=$($GNUSED <<<"$line" -e 's/^\([^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]* *[^ ][^ ]*\) .*$/\1/')
suffix=${line:${#prefix}}
if [[ $dwidth -eq 0 && $suffix == *\ . ]]; then
# This is the line for '.', typically the first line in a directory listing.
# We don't list it but we use it to compute the width of the date field.
dwidth=$((${#suffix} - 1))
else
# This is a standard file or directory. Get basic fields.
prefix=$($GNUSED <<<"$prefix" -e 's/ */ /g')
type=${prefix:0:1}
type=${type/-/f}
mode=${prefix:1:9}
owner=$(cut <<<"$prefix" -d ' ' -f 3)
group=$(cut <<<"$prefix" -d ' ' -f 4)
size=$(cut <<<"$prefix" -d ' ' -f 5)
# Mode
if ! $OPT_FULL_MODE; then
intmode=0
while [[ -n "$mode" ]]; do
intmode=$(( $intmode << 1 ))
[[ "${mode:0:1}" != "-" ]] && intmode=$(( $intmode | 1 ))
mode=${mode:1}
done
mode=$(printf '%04o' $intmode)
fi
# Date
if [[ $dwidth == 0 ]]; then
# Don't know date field size, assume that the file name has no space.
date=0
name=$($GNUSED <<<"$suffix" -e 's/^.* \([^ ][^ ]*\)$/\1/')
else
# Expect a fixed-size date field.
date=$($GNUSED <<<"${suffix:0:$dwidth}" -e 's/^ *//' -e 's/ */ /g' -e 's/ *$//')
if $OPT_FULL_DATE; then
date="'$date'"
else
date=$(date_to_seconds "$date")
date=${date:-0}
fi
# File name follows the date.
name=${suffix:$dwidth}
fi
# Skip file if it matches an exclude spec.
excl=false
for wild in "${OPT_EXCLUDE[@]}"; do
if [[ $name == $wild ]]; then
excl=true
break
fi
done
$excl && continue
# Output line format:
if $OPT_DELETE; then
[[ "$type" == d ]] && cmd="rmdir" || cmd="rm"
out="-$cmd '${OPT_ROOT}${OPT_ROOT:+/}${name}'"
elif $OPT_NAME_ONLY; then
out="${OPT_PREFIX}$name"
else
out="$type $mode $owner $group $size $date ${OPT_PREFIX}$name"
fi
# Output and recurse on directory.
if [[ "$type" != d ]]; then
# Regular file.
echo "$out"
else
$OPT_REV_DIR || echo "$out"
$BASH_SOURCE $OPTIONS "$OPT_HOST" "${OPT_ROOT}${OPT_ROOT:+/}${name}" --recurse --prefix "${OPT_PREFIX}${name}/"
$OPT_REV_DIR && echo "$out"
fi
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment