Skip to content

Instantly share code, notes, and snippets.

@o0-o
Last active March 4, 2020 19:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save o0-o/25519172def734b7d36bc9d1ca84d5b1 to your computer and use it in GitHub Desktop.
Save o0-o/25519172def734b7d36bc9d1ca84d5b1 to your computer and use it in GitHub Desktop.
Backup to Tape and Cloud with Email Alert
#!/usr/bin/env bash
#
# Archive data to tape and cloud storage with log and email alert.
#
# Usage: tape-cloud-bu.sh /mnt/data /mnt/ltotape/path rclone_remote:/path
#
################################################################################
# safety first
set -euo pipefail
PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"; hash -r
#set -x #debug
#readonly _DRYRUN="TRUE" #debug
################################################################################
# DECLARATIONS #################################################################
################################################################################
readonly _PID=$$ #logger -i is unreliable
readonly _LOG="/var/log/messages"
readonly _BEGAN="$(date)"
function dryrun { [ "${_DRYRUN:-}" == "TRUE" ]; }
declare SOURCE="$1" #ex: /mnt/data
declare TAPE_DESTINATION="$2" #ex: /mnt/ltotape/path
declare CLOUD_DESTINATION="$3" #ex: rclone_remote:/path
# format and reporting vars
[ "${#TAPE_DESTINATION}" -gt "${#CLOUD_DESTINATION}" ] &&
declare LOG_ALIGN="${#TAPE_DESTINATION}" ||
declare LOG_ALIGN="${#CLOUD_DESTINATION}"
[ "${#SOURCE}" -gt "${LOG_ALIGN}" ] &&
declare LOG_ALIGN="${#SOURCE}" || :
declare STATUS=""
# log args
printf "%-6s: %s\n" \
"SOURCE" "${SOURCE}" \
"TAPE" "${TAPE_DESTINATION}" \
"CLOUD" "${CLOUD_DESTINATION}" |
logger -s -t "${_PID}" -p user.info
################################################################################
### MAIN LOOP -- All files and directories in SOURCE ###########################
################################################################################
# Send find errors to log while still piping results to the loop
################################################################################
# Piping out of find was stripping trailing white space in some instances so we
# add END as a suffix to be stripped out once in the loop.
################################################################################
{ find "${SOURCE}" \
-printf "%pEND\n" \
-name "*" 1>&3 2>&1 |
logger -s -t "${_PID}" \
-p user.warning
} 3>&1 |
sed 's/\\/\\\\/g' | #<== MAIN LOOP PIPE ==
while read SOURCE_ITEM; do
##############################################################################
# SOURCE #####################################################################
##############################################################################
# strip END suffix
declare SOURCE_ITEM="$(
printf "%s" "${SOURCE_ITEM}" |
sed 's/END$//'
)"
# Check if source is valid
##############################################################################
[ ! -d "${SOURCE_ITEM}" ] &&
[ ! -f "${SOURCE_ITEM}" ] &&
! stat "${SOURCE_ITEM}" >/dev/null &&
{ printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"SOURCE" \
"FAILED" \
"" \
"${ITEM}" |
logger -s -t "${_PID}" -p user.info
declare STATUS="INCOMPLETE"
} ||
# Source is valid, process it
##############################################################################
{ #find relative item path and strip control characters
declare ESCAPED_SOURCE="$(
printf "%s" "${SOURCE}" |
sed "s/\//\\\\\//g"
)"
declare ITEM="$(
printf "%s" "${SOURCE_ITEM}" |
sed "s/^${ESCAPED_SOURCE}//
s/[[:cntrl:]]/\[CNTRL\]/g"
)"
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"SOURCE" \
"PROCESSING" \
"${SOURCE}" \
"${ITEM}" |
logger -s -t "${_PID}" -p user.info
# Sanitizing file/directory names
############################################################################
# list ltfs-supported characters
# https://www.ibm.com/support/knowledgecenter/ST5MZ9/com.ibm.storage.hollywood.doc/ltfs_sm_char_support.html
############################################################################
declare LEGAL_CHARS="$(
printf "%s" "${ITEM}" |
sed "s/^[[:space:]]*//
s/[[:space:]]*\$//
s/[[:space:]]/ /g" |
egrep --only-matching \
"[[:alnum:]]| |\!|\$|\(|\)|,|\.|;|=|\^|_|~|/|\[|\]|\{|\}|-"
)"
declare SANITIZED_ITEM="${ITEM}"
# loop through illegal characters and replace with octal value in brackets
while read ILLEGAL_CHAR; do
[ -z "${ILLEGAL_CHAR}" ] && break #loop will run despite no input
declare OCTAL="$(
printf "%s" "${ILLEGAL_CHAR}" |
od |
sed 's/^[0-9]\{7\}//
s/[[:space:]]*//g
q'
)"
# Some characters (single quote) behave strangely when escaped and instead
# of replacing the illegal character, the octal appends to the end of the
# name. Other characters (brackets) do need to be escaped and will error
# out otherwise, so we try unescaped and fall back to escaped.
declare SANITIZED_ITEM="$(
printf "%s" "${SANITIZED_ITEM}" |
{ sed "s/${ILLEGAL_CHAR}/\[${OCTAL}\]/g" 2>/dev/null ||
sed "s/\\${ILLEGAL_CHAR}/\[${OCTAL}\]/g"
}
)"
done <<< "$( # list illegal characters in item path
diff <(printf "%s\n" "${LEGAL_CHARS}") \
<(printf "%s\n" "${ITEM}" | fold -w 1) |
grep "^>" |
cut -d ' ' -f 2 |
sort |
uniq |
sed 's/\\/\\\\/g'
)"
# Detect if santization created a name collision (edge case)
############################################################################
[ "${ITEM##*/}" != "${SANITIZED_ITEM##*/}" ] &&
[ "${ITEM}" != "" ] &&
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"NAME" \
"SANITIZED" \
"" \
"${SANITIZED_ITEM}" |
logger -s -t "${_PID}" -p user.notice &&
ls -a "${SOURCE_ITEM%/*}" |
fgrep -q "^${SANITIZED_ITEM##*/}\$" &&
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"NAME" \
"COLLISION" \
"" \
"${ITEM}" |
logger -s -t "${_PID}" -p user.err &&
# break main loop if collision was detected
{ declare STATUS="FAILED"; break; } || :
}
# END SOURCE
##############################################################################
# DIRECTORY ##################################################################
##############################################################################
# If directory, make the directory on tape/cloud and copy contents to cloud
##############################################################################
[ -d "${SOURCE_ITEM}" ] &&
# DIRECTORY - Cloud
##############################################################################
# Attempting to run rclone on the entire source parallel to local copy
# resulted in memory exhaustion while running rclone on each file exhausted
# Google's API call quota. As a compromise, we batch upload files one
# directory at a time. As of rclone 1.50, illegal characters are handled
# transparently, so we do not need to use the sanitized file names.
##############################################################################
{ #make directory on remote cloud
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"CLOUD" \
"MAKING DIRECTORY" \
"${CLOUD_DESTINATION}" \
"${ITEM}" |
logger -s -t "${_PID}" -p user.info
dryrun ||
rclone \
mkdir \
"${CLOUD_DESTINATION}${ITEM}" \
2>&1 |
logger -s -t "${_PID}" -p user.warning ||
# if mkdir fails, stop copying and report failure
{ declare STATUS="FAILED"; break; }
} &&
{ #copy all files in the directory but no subdirectories
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"CLOUD" \
"COPYING CONTENTS" \
"${CLOUD_DESTINATION}" \
"${ITEM}/*" |
logger -s -t "${_PID}" -p user.info
dryrun ||
rclone \
--checksum \
--max-depth "1" \
--stats-one-line \
--stats-file-name-length "0" \
--links \
--fast-list \
--low-level-retries "999" \
--bwlimit "7M" \
copy \
"${SOURCE_ITEM}" \
"${CLOUD_DESTINATION}${ITEM}" \
2>&1 |
logger -s -t "${_PID}" -p user.warning ||
# log failed upload, set status to incomplete but continue
{ printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"CLOUD" \
"FAILED" \
"" \
"${ITEM}/*" |
logger -s -t "${_PID}" -p user.info
declare STATUS="INCOMPLETE"
}
} &&
# DIRECTORY - Tape
##############################################################################
{ #make directory on tape
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"TAPE" \
"MAKING DIRECTORY" \
"${TAPE_DESTINATION}" \
"${SANITIZED_ITEM}" |
logger -s -t "${_PID}" -p user.info
dryrun ||
mkdir -p \
"${TAPE_DESTINATION}${SANITIZED_ITEM}" \
2>&1 |
logger -s -t "${_PID}" -p user.warning ||
# if mkdir fails, stop copying and report failure
{ declare STATUS="FAILED"; break; }
} ||
[ ! -d "${SOURCE_ITEM}" ] ||
# END DIRECTORY
# catchall - something went wrong
{ declare STATUS="FAILED"; break; }
##############################################################################
# FILE #######################################################################
##############################################################################
# If source item is a file, copy the file to tape
############################################################################
# We use rsync instead of ltfscopy because ltfscopy only copies directories
# and has no way to reconcile names with illegal characters.
############################################################################
[ -f "${SOURCE_ITEM}" ] &&
# FILE - Tape
############################################################################
{ #copy to tape
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"TAPE" \
"COPYING FILE" \
"${TAPE_DESTINATION}" \
"${SANITIZED_ITEM}" |
logger -s -t "${_PID}" -p user.info
dryrun ||
rsync -ac \
"${SOURCE_ITEM}" \
"${TAPE_DESTINATION}${SANITIZED_ITEM}" \
2>&1 |
logger -s -t "${_PID}" -p user.warning ||
{ #log failed copy, set status to incomplete but continue
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \
"TAPE" \
"FAILED" \
"${TAPE_DESTINATION}" \
"${SANITIZED_ITEM}" |
logger -s -t "${_PID}" -p user.info
declare STATUS="INCOMPLETE"
}
} ||
[ ! -f "${SOURCE_ITEM}" ] ||
# END FILE
# catchall - something went wrong
{ declare STATUS="FAILED"; break; }
done
# END MAIN LOOP
################################################################################
### REPORTING -- Copy log to destinations and email notification ###############
################################################################################
# set status to completed if not already set to another value
declare STATUS="$(
dryrun &&
printf "%s" "Dry Run " || :
)${STATUS:-Completed}"
# ## CURRENTLY UNSUPPORTED: https://github.com/rclone/rclone/issues/1319
# # move hidden files out of root directory on the cloud destination for better
# # presentation
# ! dryrun &&
# printf "%s" "${STATUS}" |
# grep -q "Completed" &&
# { rclone mkdir \
# "${CLOUD_DESTINATION}/Hidden System Files" &&
# rclone \
# --include "/.*" \
# move \
# "${CLOUD_DESTINATION}" \
# "${CLOUD_DESTINATION}/Hidden System Files"
# } 2>&1 |
# logger -s -t "${_PID}" -p user.warning || : #continue on failure
# log final success or failure using appropriate log level
printf "%s" "Archive ${STATUS}" |
logger -s \
-t "${_PID}" \
-p user."$( printf "%s" "${STATUS}" |
grep -q "Completed" &&
printf "%s" "info" ||
printf "%s" "err" )"
# save a copy of the log to both destinations
! dryrun &&
grep "${_PID}" "${_LOG}"* |
gzip -9 > "${TAPE_DESTINATION}/Backup ${STATUS} ($(date +'%Y-%m-%d')).log.gz" &&
rclone copy \
--include "/Backup*.log.gz" \
"${TAPE_DESTINATION}" \
"${CLOUD_DESTINATION}" ||
dryrun ||
printf "%s" "Copying log to destination failed." |
logger -s -t "${_PID}" -p user.warning
# Email
##############################################################################
# generate random mime boundary or use pre-generated d29... if md5 is missing
# or other issue
declare MIME_BOUNDARY="$(
uuidgen |
md5 2>/dev/null ||
uuidgen |
md5sum |
cut -d ' ' -f 1 ||
printf "%s" "d29a0c638b540b23e9a29a3a9aebc900aeeb6a82"
)"
# send email notification to root
printf "%s\n" \
"Subject: $(hostname -f) - Archive ${STATUS}" \
"Mime-Version: 1.0" \
"Content-Type: multipart/mixed; boundary=\"${MIME_BOUNDARY}\"" \
"Content-Transfer-Encoding: 7bit" \
"" \
"--${MIME_BOUNDARY}" \
"Content-Type: text/html; charset=UTF-8" \
"Content-Transfer-Encoding: 7bit" \
"Content-Disposition: inline" \
"" \
"<h1>$(hostname -f | awk '{ print toupper($0) }')</h1>" \
"<h2>Archive ${STATUS}</h2>" \
"<h3>Began</h3>" \
"<pre>${_BEGAN}</pre>" \
"<h3>Source</h3>" \
"<pre>${SOURCE}</pre>" \
"<h3>Tape</h3>" \
"<pre>${TAPE_DESTINATION}</pre>" \
"<h3>Cloud</h3>" \
"<pre>${CLOUD_DESTINATION}</pre>" \
"<h2>Report</h2>" \
"<p>Files copied to tape: $(
grep "${_PID}.*COPYING FILE[[:space:]]*:" "${_LOG}"* |
wc -l
)</p>" \
"<p>Directories uploaded to cloud: $(
grep "${_PID}.*COPYING CONTENTS[[:space:]]*:" "${_LOG}"* |
wc -l
)</p>" \
"<p>Instances of unsupported characters (converted): $(
grep "${_PID}.*SANITIZED[[:space:]]*:" "${_LOG}"* |
wc -l
)</p>" \
"<p>Failures: $(
grep "${_PID}.*FAILED[[:space:]]*:" "${_LOG}"* |
wc -l
)</p>" \
"$(
grep -m 20 "${_PID}.*FAILED[[:space:]]*:" "${_LOG}"* |
sed 's/^/<li><pre>/' |
sed 's/$/<\/pre><\/li>/'
)" \
"<p>END</p>" \
"<p></p>" \
"" \
"--${MIME_BOUNDARY}" \
"Content-Type: application/x-gzip" \
"Content-Transfer-Encoding: base64" \
"Content-Disposition: attachment; filename=\"${0##*/}.log.gz\"" \
"" \
"$(
grep "${_PID}" "${_LOG}"* |
gzip -9 --to-stdout |
base64
)" \
"" \
"--${MIME_BOUNDARY}" \
"Content-Type: application/x-gzip" \
"Content-Transfer-Encoding: base64" \
"Content-Disposition: attachment; filename=\"${0##*/}.gz\"" \
"" \
"$(
gzip -9 --to-stdout "${0}" |
base64
)" \
"--${MIME_BOUNDARY}--" |
sendmail -F "${0##*/}" root 2>&1 |
logger -s -t "${_PID}" -p user.warning ||
printf "%s" "Email alert failed." |
logger -s -t "${_PID}" -p user.warning
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment