Last active
March 4, 2020 19:53
-
-
Save o0-o/25519172def734b7d36bc9d1ca84d5b1 to your computer and use it in GitHub Desktop.
Backup to Tape and Cloud with Email Alert
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Archive data to tape and cloud storage with log and email alert. | |
# | |
# Usage: tape-cloud-bu.sh /mnt/data /mnt/ltotape/path rclone_remote:/path | |
# | |
################################################################################ | |
# safety first | |
set -euo pipefail | |
PATH="/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"; hash -r | |
#set -x #debug | |
#readonly _DRYRUN="TRUE" #debug | |
################################################################################ | |
# DECLARATIONS ################################################################# | |
################################################################################ | |
readonly _PID=$$ #logger -i is unreliable | |
readonly _LOG="/var/log/messages" | |
readonly _BEGAN="$(date)" | |
function dryrun { [ "${_DRYRUN:-}" == "TRUE" ]; } | |
declare SOURCE="$1" #ex: /mnt/data | |
declare TAPE_DESTINATION="$2" #ex: /mnt/ltotape/path | |
declare CLOUD_DESTINATION="$3" #ex: rclone_remote:/path | |
# format and reporting vars | |
[ "${#TAPE_DESTINATION}" -gt "${#CLOUD_DESTINATION}" ] && | |
declare LOG_ALIGN="${#TAPE_DESTINATION}" || | |
declare LOG_ALIGN="${#CLOUD_DESTINATION}" | |
[ "${#SOURCE}" -gt "${LOG_ALIGN}" ] && | |
declare LOG_ALIGN="${#SOURCE}" || : | |
declare STATUS="" | |
# log args | |
printf "%-6s: %s\n" \ | |
"SOURCE" "${SOURCE}" \ | |
"TAPE" "${TAPE_DESTINATION}" \ | |
"CLOUD" "${CLOUD_DESTINATION}" | | |
logger -s -t "${_PID}" -p user.info | |
################################################################################ | |
### MAIN LOOP -- All files and directories in SOURCE ########################### | |
################################################################################ | |
# Send find errors to log while still piping results to the loop | |
################################################################################ | |
# Piping out of find was stripping trailing white space in some instances so we | |
# add END as a suffix to be stripped out once in the loop. | |
################################################################################ | |
{ find "${SOURCE}" \ | |
-printf "%pEND\n" \ | |
-name "*" 1>&3 2>&1 | | |
logger -s -t "${_PID}" \ | |
-p user.warning | |
} 3>&1 | | |
sed 's/\\/\\\\/g' | #<== MAIN LOOP PIPE == | |
while read SOURCE_ITEM; do | |
############################################################################## | |
# SOURCE ##################################################################### | |
############################################################################## | |
# strip END suffix | |
declare SOURCE_ITEM="$( | |
printf "%s" "${SOURCE_ITEM}" | | |
sed 's/END$//' | |
)" | |
# Check if source is valid | |
############################################################################## | |
[ ! -d "${SOURCE_ITEM}" ] && | |
[ ! -f "${SOURCE_ITEM}" ] && | |
! stat "${SOURCE_ITEM}" >/dev/null && | |
{ printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"SOURCE" \ | |
"FAILED" \ | |
"" \ | |
"${ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
declare STATUS="INCOMPLETE" | |
} || | |
# Source is valid, process it | |
############################################################################## | |
{ #find relative item path and strip control characters | |
declare ESCAPED_SOURCE="$( | |
printf "%s" "${SOURCE}" | | |
sed "s/\//\\\\\//g" | |
)" | |
declare ITEM="$( | |
printf "%s" "${SOURCE_ITEM}" | | |
sed "s/^${ESCAPED_SOURCE}// | |
s/[[:cntrl:]]/\[CNTRL\]/g" | |
)" | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"SOURCE" \ | |
"PROCESSING" \ | |
"${SOURCE}" \ | |
"${ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
# Sanitizing file/directory names | |
############################################################################ | |
# list ltfs-supported characters | |
# https://www.ibm.com/support/knowledgecenter/ST5MZ9/com.ibm.storage.hollywood.doc/ltfs_sm_char_support.html | |
############################################################################ | |
declare LEGAL_CHARS="$( | |
printf "%s" "${ITEM}" | | |
sed "s/^[[:space:]]*// | |
s/[[:space:]]*\$// | |
s/[[:space:]]/ /g" | | |
egrep --only-matching \ | |
"[[:alnum:]]| |\!|\$|\(|\)|,|\.|;|=|\^|_|~|/|\[|\]|\{|\}|-" | |
)" | |
declare SANITIZED_ITEM="${ITEM}" | |
# loop through illegal characters and replace with octal value in brackets | |
while read ILLEGAL_CHAR; do | |
[ -z "${ILLEGAL_CHAR}" ] && break #loop will run despite no input | |
declare OCTAL="$( | |
printf "%s" "${ILLEGAL_CHAR}" | | |
od | | |
sed 's/^[0-9]\{7\}// | |
s/[[:space:]]*//g | |
q' | |
)" | |
# Some characters (single quote) behave strangely when escaped and instead | |
# of replacing the illegal character, the octal appends to the end of the | |
# name. Other characters (brackets) do need to be escaped and will error | |
# out otherwise, so we try unescaped and fall back to escaped. | |
declare SANITIZED_ITEM="$( | |
printf "%s" "${SANITIZED_ITEM}" | | |
{ sed "s/${ILLEGAL_CHAR}/\[${OCTAL}\]/g" 2>/dev/null || | |
sed "s/\\${ILLEGAL_CHAR}/\[${OCTAL}\]/g" | |
} | |
)" | |
done <<< "$( # list illegal characters in item path | |
diff <(printf "%s\n" "${LEGAL_CHARS}") \ | |
<(printf "%s\n" "${ITEM}" | fold -w 1) | | |
grep "^>" | | |
cut -d ' ' -f 2 | | |
sort | | |
uniq | | |
sed 's/\\/\\\\/g' | |
)" | |
# Detect if santization created a name collision (edge case) | |
############################################################################ | |
[ "${ITEM##*/}" != "${SANITIZED_ITEM##*/}" ] && | |
[ "${ITEM}" != "" ] && | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"NAME" \ | |
"SANITIZED" \ | |
"" \ | |
"${SANITIZED_ITEM}" | | |
logger -s -t "${_PID}" -p user.notice && | |
ls -a "${SOURCE_ITEM%/*}" | | |
fgrep -q "^${SANITIZED_ITEM##*/}\$" && | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"NAME" \ | |
"COLLISION" \ | |
"" \ | |
"${ITEM}" | | |
logger -s -t "${_PID}" -p user.err && | |
# break main loop if collision was detected | |
{ declare STATUS="FAILED"; break; } || : | |
} | |
# END SOURCE | |
############################################################################## | |
# DIRECTORY ################################################################## | |
############################################################################## | |
# If directory, make the directory on tape/cloud and copy contents to cloud | |
############################################################################## | |
[ -d "${SOURCE_ITEM}" ] && | |
# DIRECTORY - Cloud | |
############################################################################## | |
# Attempting to run rclone on the entire source parallel to local copy | |
# resulted in memory exhaustion while running rclone on each file exhausted | |
# Google's API call quota. As a compromise, we batch upload files one | |
# directory at a time. As of rclone 1.50, illegal characters are handled | |
# transparently, so we do not need to use the sanitized file names. | |
############################################################################## | |
{ #make directory on remote cloud | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"CLOUD" \ | |
"MAKING DIRECTORY" \ | |
"${CLOUD_DESTINATION}" \ | |
"${ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
dryrun || | |
rclone \ | |
mkdir \ | |
"${CLOUD_DESTINATION}${ITEM}" \ | |
2>&1 | | |
logger -s -t "${_PID}" -p user.warning || | |
# if mkdir fails, stop copying and report failure | |
{ declare STATUS="FAILED"; break; } | |
} && | |
{ #copy all files in the directory but no subdirectories | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"CLOUD" \ | |
"COPYING CONTENTS" \ | |
"${CLOUD_DESTINATION}" \ | |
"${ITEM}/*" | | |
logger -s -t "${_PID}" -p user.info | |
dryrun || | |
rclone \ | |
--checksum \ | |
--max-depth "1" \ | |
--stats-one-line \ | |
--stats-file-name-length "0" \ | |
--links \ | |
--fast-list \ | |
--low-level-retries "999" \ | |
--bwlimit "7M" \ | |
copy \ | |
"${SOURCE_ITEM}" \ | |
"${CLOUD_DESTINATION}${ITEM}" \ | |
2>&1 | | |
logger -s -t "${_PID}" -p user.warning || | |
# log failed upload, set status to incomplete but continue | |
{ printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"CLOUD" \ | |
"FAILED" \ | |
"" \ | |
"${ITEM}/*" | | |
logger -s -t "${_PID}" -p user.info | |
declare STATUS="INCOMPLETE" | |
} | |
} && | |
# DIRECTORY - Tape | |
############################################################################## | |
{ #make directory on tape | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"TAPE" \ | |
"MAKING DIRECTORY" \ | |
"${TAPE_DESTINATION}" \ | |
"${SANITIZED_ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
dryrun || | |
mkdir -p \ | |
"${TAPE_DESTINATION}${SANITIZED_ITEM}" \ | |
2>&1 | | |
logger -s -t "${_PID}" -p user.warning || | |
# if mkdir fails, stop copying and report failure | |
{ declare STATUS="FAILED"; break; } | |
} || | |
[ ! -d "${SOURCE_ITEM}" ] || | |
# END DIRECTORY | |
# catchall - something went wrong | |
{ declare STATUS="FAILED"; break; } | |
############################################################################## | |
# FILE ####################################################################### | |
############################################################################## | |
# If source item is a file, copy the file to tape | |
############################################################################ | |
# We use rsync instead of ltfscopy because ltfscopy only copies directories | |
# and has no way to reconcile names with illegal characters. | |
############################################################################ | |
[ -f "${SOURCE_ITEM}" ] && | |
# FILE - Tape | |
############################################################################ | |
{ #copy to tape | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"TAPE" \ | |
"COPYING FILE" \ | |
"${TAPE_DESTINATION}" \ | |
"${SANITIZED_ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
dryrun || | |
rsync -ac \ | |
"${SOURCE_ITEM}" \ | |
"${TAPE_DESTINATION}${SANITIZED_ITEM}" \ | |
2>&1 | | |
logger -s -t "${_PID}" -p user.warning || | |
{ #log failed copy, set status to incomplete but continue | |
printf "%-6s: %-16s: %${LOG_ALIGN}s%s" \ | |
"TAPE" \ | |
"FAILED" \ | |
"${TAPE_DESTINATION}" \ | |
"${SANITIZED_ITEM}" | | |
logger -s -t "${_PID}" -p user.info | |
declare STATUS="INCOMPLETE" | |
} | |
} || | |
[ ! -f "${SOURCE_ITEM}" ] || | |
# END FILE | |
# catchall - something went wrong | |
{ declare STATUS="FAILED"; break; } | |
done | |
# END MAIN LOOP | |
################################################################################ | |
### REPORTING -- Copy log to destinations and email notification ############### | |
################################################################################ | |
# set status to completed if not already set to another value | |
declare STATUS="$( | |
dryrun && | |
printf "%s" "Dry Run " || : | |
)${STATUS:-Completed}" | |
# ## CURRENTLY UNSUPPORTED: https://github.com/rclone/rclone/issues/1319 | |
# # move hidden files out of root directory on the cloud destination for better | |
# # presentation | |
# ! dryrun && | |
# printf "%s" "${STATUS}" | | |
# grep -q "Completed" && | |
# { rclone mkdir \ | |
# "${CLOUD_DESTINATION}/Hidden System Files" && | |
# rclone \ | |
# --include "/.*" \ | |
# move \ | |
# "${CLOUD_DESTINATION}" \ | |
# "${CLOUD_DESTINATION}/Hidden System Files" | |
# } 2>&1 | | |
# logger -s -t "${_PID}" -p user.warning || : #continue on failure | |
# log final success or failure using appropriate log level | |
printf "%s" "Archive ${STATUS}" | | |
logger -s \ | |
-t "${_PID}" \ | |
-p user."$( printf "%s" "${STATUS}" | | |
grep -q "Completed" && | |
printf "%s" "info" || | |
printf "%s" "err" )" | |
# save a copy of the log to both destinations | |
! dryrun && | |
grep "${_PID}" "${_LOG}"* | | |
gzip -9 > "${TAPE_DESTINATION}/Backup ${STATUS} ($(date +'%Y-%m-%d')).log.gz" && | |
rclone copy \ | |
--include "/Backup*.log.gz" \ | |
"${TAPE_DESTINATION}" \ | |
"${CLOUD_DESTINATION}" || | |
dryrun || | |
printf "%s" "Copying log to destination failed." | | |
logger -s -t "${_PID}" -p user.warning | |
############################################################################## | |
# generate random mime boundary or use pre-generated d29... if md5 is missing | |
# or other issue | |
declare MIME_BOUNDARY="$( | |
uuidgen | | |
md5 2>/dev/null || | |
uuidgen | | |
md5sum | | |
cut -d ' ' -f 1 || | |
printf "%s" "d29a0c638b540b23e9a29a3a9aebc900aeeb6a82" | |
)" | |
# send email notification to root | |
printf "%s\n" \ | |
"Subject: $(hostname -f) - Archive ${STATUS}" \ | |
"Mime-Version: 1.0" \ | |
"Content-Type: multipart/mixed; boundary=\"${MIME_BOUNDARY}\"" \ | |
"Content-Transfer-Encoding: 7bit" \ | |
"" \ | |
"--${MIME_BOUNDARY}" \ | |
"Content-Type: text/html; charset=UTF-8" \ | |
"Content-Transfer-Encoding: 7bit" \ | |
"Content-Disposition: inline" \ | |
"" \ | |
"<h1>$(hostname -f | awk '{ print toupper($0) }')</h1>" \ | |
"<h2>Archive ${STATUS}</h2>" \ | |
"<h3>Began</h3>" \ | |
"<pre>${_BEGAN}</pre>" \ | |
"<h3>Source</h3>" \ | |
"<pre>${SOURCE}</pre>" \ | |
"<h3>Tape</h3>" \ | |
"<pre>${TAPE_DESTINATION}</pre>" \ | |
"<h3>Cloud</h3>" \ | |
"<pre>${CLOUD_DESTINATION}</pre>" \ | |
"<h2>Report</h2>" \ | |
"<p>Files copied to tape: $( | |
grep "${_PID}.*COPYING FILE[[:space:]]*:" "${_LOG}"* | | |
wc -l | |
)</p>" \ | |
"<p>Directories uploaded to cloud: $( | |
grep "${_PID}.*COPYING CONTENTS[[:space:]]*:" "${_LOG}"* | | |
wc -l | |
)</p>" \ | |
"<p>Instances of unsupported characters (converted): $( | |
grep "${_PID}.*SANITIZED[[:space:]]*:" "${_LOG}"* | | |
wc -l | |
)</p>" \ | |
"<p>Failures: $( | |
grep "${_PID}.*FAILED[[:space:]]*:" "${_LOG}"* | | |
wc -l | |
)</p>" \ | |
"$( | |
grep -m 20 "${_PID}.*FAILED[[:space:]]*:" "${_LOG}"* | | |
sed 's/^/<li><pre>/' | | |
sed 's/$/<\/pre><\/li>/' | |
)" \ | |
"<p>END</p>" \ | |
"<p></p>" \ | |
"" \ | |
"--${MIME_BOUNDARY}" \ | |
"Content-Type: application/x-gzip" \ | |
"Content-Transfer-Encoding: base64" \ | |
"Content-Disposition: attachment; filename=\"${0##*/}.log.gz\"" \ | |
"" \ | |
"$( | |
grep "${_PID}" "${_LOG}"* | | |
gzip -9 --to-stdout | | |
base64 | |
)" \ | |
"" \ | |
"--${MIME_BOUNDARY}" \ | |
"Content-Type: application/x-gzip" \ | |
"Content-Transfer-Encoding: base64" \ | |
"Content-Disposition: attachment; filename=\"${0##*/}.gz\"" \ | |
"" \ | |
"$( | |
gzip -9 --to-stdout "${0}" | | |
base64 | |
)" \ | |
"--${MIME_BOUNDARY}--" | | |
sendmail -F "${0##*/}" root 2>&1 | | |
logger -s -t "${_PID}" -p user.warning || | |
printf "%s" "Email alert failed." | | |
logger -s -t "${_PID}" -p user.warning | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment