Last active
August 9, 2017 19:46
-
-
Save kaczmarj/48d91468c967cc263d261dca7e6e88af to your computer and use it in GitHub Desktop.
Merge [ReproZip](https://www.reprozip.org/) pack files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# This script merges multiple ReproZip version 2 pack files. | |
# | |
# Example: | |
# | |
# bash merge_packfiles.sh -o merged.rpz packA.rpz packB.rpz packC.rpz | |
# | |
# | |
# Requires reprozip and rsync. | |
# | |
# Implementation: | |
# - Accept paths to multiple ReproZip pack files and an output filepath. | |
# - Create a directory for the final, merged pack file. | |
# - For each pack file: | |
# - Extract the pack file (POSIX tar archive, uncompressed). | |
# - Extract and decompress the DATA.tar.gz file. | |
# - Move the version file to the merged pack directory, if one does not | |
# already exist. | |
# - Use rsync to merge all of the ReproZip DATA directories. | |
# - Tar the merged data directory. | |
# - Run `reprozip combine` on all of the trace.sqlite3 files. | |
# - Tar the merged directory to create the final pack file. | |
set -e | |
set -u | |
set -x | |
function program_exists() | |
{ | |
hash "$1" 2>/dev/null; | |
} | |
for DEPENDENCY in reprozip rsync | |
do | |
if ! program_exists "$DEPENDENCY"; then | |
echo "Dependency not found: $DEPENDENCY" | |
exit 1 | |
fi | |
done | |
# https://stackoverflow.com/a/16496491/5666087 | |
usage() { echo "Usage: $0 [-o <output>] <pack files>" 1>&2; exit 1; } | |
while getopts ":o:" opts; do | |
case "${opts}" in | |
o) | |
o=${OPTARG} | |
;; | |
*) | |
usage | |
;; | |
esac | |
done | |
shift $((OPTIND-1)) | |
if [ -z "${o}" ]; then | |
usage | |
fi | |
PACK_FILES="$@" | |
PACK_FILE_EXT=".rpz" | |
TMP_DEST="_tmp_reprozip_merge" | |
# New directory to store the merged pack files. | |
MERGED_DEST="${TMP_DEST}/merged" | |
MERGED_METADATA_DIR="${MERGED_DEST}/METADATA" | |
MERGED_DATA_DIR="${MERGED_DEST}/DATA/" | |
if [ -d "$TMP_DEST" ]; then | |
echo "Temporary directory $TMP_DEST already exists." | |
exit 2 | |
fi | |
if [ -z "$PACK_FILES" ]; then | |
usage | |
fi | |
if [[ $o == *.rpz ]]; then | |
OUTFILE="$o" | |
else | |
echo "Appending '.rpz' to the output filename." | |
OUTFILE="${o}.rpz" | |
fi | |
mkdir -p ${MERGED_DEST}/METADATA | |
for THIS_PACK_FILE in $PACK_FILES | |
do | |
THIS_BASENAME=$(basename $THIS_PACK_FILE $PACK_FILE_EXT) | |
THIS_TMP_PACK_DIR="${TMP_DEST}/${THIS_BASENAME}" | |
mkdir -p $THIS_TMP_PACK_DIR | |
# Extract pack file. | |
tar -xf $THIS_PACK_FILE -C $THIS_TMP_PACK_DIR | |
# Move a version file over into the merged pack file. This assumes that the | |
# version of ReproZip pack file (1 or 2) was the same for all traces. | |
if [ ! -f $MERGED_METADATA_DIR/version ]; then | |
mv $THIS_TMP_PACK_DIR/METADATA/version $MERGED_METADATA_DIR/version | |
fi | |
# Extract the data inside the extracted pack file. | |
tar -xzf "$THIS_TMP_PACK_DIR/DATA.tar.gz" -C $THIS_TMP_PACK_DIR | |
done | |
# Merge all DATA files and directories. | |
DATA_DIRS="${TMP_DEST}/**/DATA/" | |
rsync -rqabuP $DATA_DIRS $MERGED_DATA_DIR | |
tar -czf ${MERGED_DEST}/DATA.tar.gz -C $MERGED_DEST DATA | |
rm -rf ${MERGED_DEST}/DATA | |
# Merge all trace databases into one. This also creates a merged config.yml. | |
DATABASES="${TMP_DEST}/**/METADATA/trace.sqlite3" | |
reprozip combine $DATABASES --dir=$MERGED_METADATA_DIR | |
# Create the merged pack file. | |
#tar -cf $OUTFILE -C $MERGED_DEST . | |
# tar cf out.tar -C merged METADATA DATA.tar.gz | |
tar cf $OUTFILE -C $MERGED_DEST METADATA DATA.tar.gz | |
rm -rf ${TMP_DEST} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment