Skip to content

Instantly share code, notes, and snippets.

@kaczmarj
Last active August 9, 2017 19:46
Show Gist options
  • Save kaczmarj/48d91468c967cc263d261dca7e6e88af to your computer and use it in GitHub Desktop.
Save kaczmarj/48d91468c967cc263d261dca7e6e88af to your computer and use it in GitHub Desktop.
Merge [ReproZip](https://www.reprozip.org/) pack files
#!/usr/bin/env bash
# This script merges multiple ReproZip version 2 pack files.
#
# Example:
#
# bash merge_packfiles.sh -o merged.rpz packA.rpz packB.rpz packC.rpz
#
#
# Requires reprozip and rsync.
#
# Implementation:
# - Accept paths to multiple ReproZip pack files and an output filepath.
# - Create a directory for the final, merged pack file.
# - For each pack file:
# - Extract the pack file (POSIX tar archive, uncompressed).
# - Extract and decompress the DATA.tar.gz file.
# - Move the version file to the merged pack directory, if one does not
# already exist.
# - Use rsync to merge all of the ReproZip DATA directories.
# - Tar the merged data directory.
# - Run `reprozip combine` on all of the trace.sqlite3 files.
# - Tar the merged directory to create the final pack file.
set -e
set -u
set -x
function program_exists()
{
hash "$1" 2>/dev/null;
}
for DEPENDENCY in reprozip rsync
do
if ! program_exists "$DEPENDENCY"; then
echo "Dependency not found: $DEPENDENCY"
exit 1
fi
done
# https://stackoverflow.com/a/16496491/5666087
usage() { echo "Usage: $0 [-o <output>] <pack files>" 1>&2; exit 1; }
while getopts ":o:" opts; do
case "${opts}" in
o)
o=${OPTARG}
;;
*)
usage
;;
esac
done
shift $((OPTIND-1))
if [ -z "${o}" ]; then
usage
fi
PACK_FILES="$@"
PACK_FILE_EXT=".rpz"
TMP_DEST="_tmp_reprozip_merge"
# New directory to store the merged pack files.
MERGED_DEST="${TMP_DEST}/merged"
MERGED_METADATA_DIR="${MERGED_DEST}/METADATA"
MERGED_DATA_DIR="${MERGED_DEST}/DATA/"
if [ -d "$TMP_DEST" ]; then
echo "Temporary directory $TMP_DEST already exists."
exit 2
fi
if [ -z "$PACK_FILES" ]; then
usage
fi
if [[ $o == *.rpz ]]; then
OUTFILE="$o"
else
echo "Appending '.rpz' to the output filename."
OUTFILE="${o}.rpz"
fi
mkdir -p ${MERGED_DEST}/METADATA
for THIS_PACK_FILE in $PACK_FILES
do
THIS_BASENAME=$(basename $THIS_PACK_FILE $PACK_FILE_EXT)
THIS_TMP_PACK_DIR="${TMP_DEST}/${THIS_BASENAME}"
mkdir -p $THIS_TMP_PACK_DIR
# Extract pack file.
tar -xf $THIS_PACK_FILE -C $THIS_TMP_PACK_DIR
# Move a version file over into the merged pack file. This assumes that the
# version of ReproZip pack file (1 or 2) was the same for all traces.
if [ ! -f $MERGED_METADATA_DIR/version ]; then
mv $THIS_TMP_PACK_DIR/METADATA/version $MERGED_METADATA_DIR/version
fi
# Extract the data inside the extracted pack file.
tar -xzf "$THIS_TMP_PACK_DIR/DATA.tar.gz" -C $THIS_TMP_PACK_DIR
done
# Merge all DATA files and directories.
DATA_DIRS="${TMP_DEST}/**/DATA/"
rsync -rqabuP $DATA_DIRS $MERGED_DATA_DIR
tar -czf ${MERGED_DEST}/DATA.tar.gz -C $MERGED_DEST DATA
rm -rf ${MERGED_DEST}/DATA
# Merge all trace databases into one. This also creates a merged config.yml.
DATABASES="${TMP_DEST}/**/METADATA/trace.sqlite3"
reprozip combine $DATABASES --dir=$MERGED_METADATA_DIR
# Create the merged pack file.
#tar -cf $OUTFILE -C $MERGED_DEST .
# tar cf out.tar -C merged METADATA DATA.tar.gz
tar cf $OUTFILE -C $MERGED_DEST METADATA DATA.tar.gz
rm -rf ${TMP_DEST}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment