Last active
March 22, 2022 10:38
-
-
Save ChrisCummins/371a6a1c549de73e047bf543ebc83071 to your computer and use it in GitHub Desktop.
A (paranoid) script for extracting Backblaze's zip restore archives.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Unpacking Backblaze archives | |
# ---------------------------- | |
# | |
# A paranoid script for extracting a zip file into the current directory. It | |
# attempts to validate against common gotchas: it checks the list of extracted | |
# files against the list of archive contents, it valids the zipfile's contents | |
# checksums, and it uses "ditto" over "unzip" because it handles non-ASCII | |
# filenames properly. | |
# | |
# Usage: unpack_bacbklaze_restore_zip_archive.sh <zip archive> | |
set -euo pipefail | |
export LC_ALL=C | |
export LANG=en_US.UTF-8 | |
unpack_with_ditto() { | |
local zipfile="$1" | |
echo "Unzipping ${zipfile}, logging to ${zipfile}.logs/ditto.txt" | |
ditto -V -x -k --sequesterRsrc --rsrc "${1}" . &> "${1}.logs/ditto.txt" | |
echo "Computing ${zipfile}.logs/unzipped.txt" | |
grep 'copying file' "${zipfile}.logs/ditto.txt" | sed 's/^copying file // ; s/ ....$//' | sort > "${zipfile}.logs/unzipped.txt" | |
} | |
unpack_with_unzip() { | |
local zipfile="$1" | |
echo "Unzipping ${zipfile}, logging to ${zipfile}.logs/unzip.txt" | |
unzip -o "${zipfile}" &> "${1}.logs/unzip.txt" | |
echo "Computing ${zipfile}.logs/unzipped.txt" | |
grep inflating "${zipfile}.logs/unzip.txt" | sed 's/^ inflating: //' | awk '{$1=$1};1' | sort "${zipfile}.logs/unzipped.txt" | |
} | |
main() { | |
local zipfile="$1" | |
# Write files to a <name>.logs directory. | |
mkdir -p "${zipfile}.logs" | |
# Dump a list of file names from the zip archive. We can use this to | |
# compare against after unpacking to make sure we got everything. | |
echo "Writing ${zipfile}.logs/file_list.txt" | |
zipinfo -1 "${zipfile}" | sort > "${zipfile}.logs/file_list.txt" | |
echo $(wc -l "${zipfile}.logs/file_list.txt" | awk '{print $1}') files in "${zipfile}" | |
# Run unzip's "test" utility, which runs and in-memory extract and compares | |
# the checksums. | |
echo "Testing zipfile contents, logging to ${zipfile}.logs/ziptest.txt" | |
unzip -t "${zipfile}" &> "${zipfile}.logs/ziptest.txt" | |
# ================================================ | |
# Run the actual extraction. | |
# If ditto fails, try "unpack_with_unzip" | |
# ================================================ | |
unpack_with_ditto "${zipfile}" | |
# Compare the list of extracted files with the list of archive contents | |
# that we computed earlier. Any output here shows that something went | |
# wrong, and the script will exit with a non-zero return code. | |
echo "Diffing unzip file list with ditto extraction list" | |
git diff --no-index "${zipfile}.logs/file_list.txt" "${zipfile}.logs/unzipped.txt" 2>&1 | tee "${zipfile}.logs/file_list_unzipped_diff.txt" | |
} | |
main $@ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment