Skip to content

Instantly share code, notes, and snippets.

@ChrisCummins
Last active March 22, 2022 10:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ChrisCummins/371a6a1c549de73e047bf543ebc83071 to your computer and use it in GitHub Desktop.
Save ChrisCummins/371a6a1c549de73e047bf543ebc83071 to your computer and use it in GitHub Desktop.
A (paranoid) script for extracting Backblaze's zip restore archives.
#!/usr/bin/env bash
#
# Unpacking Backblaze archives
# ----------------------------
#
# A paranoid script for extracting a zip file into the current directory. It
# attempts to validate against common gotchas: it checks the list of extracted
# files against the list of archive contents, it valids the zipfile's contents
# checksums, and it uses "ditto" over "unzip" because it handles non-ASCII
# filenames properly.
#
# Usage: unpack_bacbklaze_restore_zip_archive.sh <zip archive>
set -euo pipefail
export LC_ALL=C
export LANG=en_US.UTF-8
unpack_with_ditto() {
local zipfile="$1"
echo "Unzipping ${zipfile}, logging to ${zipfile}.logs/ditto.txt"
ditto -V -x -k --sequesterRsrc --rsrc "${1}" . &> "${1}.logs/ditto.txt"
echo "Computing ${zipfile}.logs/unzipped.txt"
grep 'copying file' "${zipfile}.logs/ditto.txt" | sed 's/^copying file // ; s/ ....$//' | sort > "${zipfile}.logs/unzipped.txt"
}
unpack_with_unzip() {
local zipfile="$1"
echo "Unzipping ${zipfile}, logging to ${zipfile}.logs/unzip.txt"
unzip -o "${zipfile}" &> "${1}.logs/unzip.txt"
echo "Computing ${zipfile}.logs/unzipped.txt"
grep inflating "${zipfile}.logs/unzip.txt" | sed 's/^ inflating: //' | awk '{$1=$1};1' | sort "${zipfile}.logs/unzipped.txt"
}
main() {
local zipfile="$1"
# Write files to a <name>.logs directory.
mkdir -p "${zipfile}.logs"
# Dump a list of file names from the zip archive. We can use this to
# compare against after unpacking to make sure we got everything.
echo "Writing ${zipfile}.logs/file_list.txt"
zipinfo -1 "${zipfile}" | sort > "${zipfile}.logs/file_list.txt"
echo $(wc -l "${zipfile}.logs/file_list.txt" | awk '{print $1}') files in "${zipfile}"
# Run unzip's "test" utility, which runs and in-memory extract and compares
# the checksums.
echo "Testing zipfile contents, logging to ${zipfile}.logs/ziptest.txt"
unzip -t "${zipfile}" &> "${zipfile}.logs/ziptest.txt"
# ================================================
# Run the actual extraction.
# If ditto fails, try "unpack_with_unzip"
# ================================================
unpack_with_ditto "${zipfile}"
# Compare the list of extracted files with the list of archive contents
# that we computed earlier. Any output here shows that something went
# wrong, and the script will exit with a non-zero return code.
echo "Diffing unzip file list with ditto extraction list"
git diff --no-index "${zipfile}.logs/file_list.txt" "${zipfile}.logs/unzipped.txt" 2>&1 | tee "${zipfile}.logs/file_list_unzipped_diff.txt"
}
main $@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment