Skip to content

Instantly share code, notes, and snippets.

@mttjohnson
Last active February 25, 2020 12:01
Show Gist options
  • Save mttjohnson/ae8bb57ca12cbcf971df79e647c4936c to your computer and use it in GitHub Desktop.
Save mttjohnson/ae8bb57ca12cbcf971df79e647c4936c to your computer and use it in GitHub Desktop.
MySQL DB snapshot separating tables into files for comparison purposes
#!/usr/bin/env bash
##
# Copyright © 2017 by Matt Johnson. All rights reserved
#
# Licensed under the Open Software License 3.0 (OSL-3.0)
# See included LICENSE file for full text of OSL-3.0
#
# https://github.com/mttjohnson
##
# stop on errors
set -e
trap '>&2 echo Error: Command \`$BASH_COMMAND\` on line $LINENO failed with exit code $?' ERR
# identify current script directory
DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
# default variable values
DESTINATION_PATH="${DIR}/dbsnapshots"
## command line argument parsing
for arg in "$@"; do
case $arg in
--db-name=*)
DB_NAME="${arg#*=}"
if [[ ! "$DB_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then
>&2 echo "Error: Invalid value given --db-name=$DB_NAME"
exit -1
fi
;;
--destination-path=*)
DESTINATION_PATH="${arg#*=}"
if [[ ! "$DESTINATION_PATH" =~ ^(2\.0|develop)$ ]]; then
>&2 echo "Error: Invalid value given --destination-path=$DESTINATION_PATH"
exit -1
fi
;;
--comment=*)
SNAPSHOT_COMMENT="${arg#*=}"
if [[ ! "$SNAPSHOT_COMMENT" =~ ^[a-z0-9][a-z0-9_/-]*[a-z0-9_-]+$ ]]; then
>&2 echo "Error: Invalid value given --comment=$SNAPSHOT_COMMENT"
exit -1
fi
;;
--help)
echo ""
echo "Usage: $(basename $0) --db-name=<db-name> [--destination-path=<path>] [--comment=<comment>]"
echo ""
echo " --db-name=<db-name> database name to dump and take a snapshot of"
echo " --destination-path=<path> destnation directory path for snapshots"
echo " --comment=<comment> comment reference describing the snapshot"
echo ""
echo "The script will create a dbsnapshots directory and store the contents into a"
echo "separate timestamped directory on each execution of the script. A full backup of"
echo "the database will exist in the directory in addition to a tables directory. The"
echo "tables directory has a list of files broken out by table separate data and"
echo "structure files. When comparing database snapshots you will want to compare the"
echo "tables directories from each snapshot."
echo ""
echo "If you include a comment when executing the command there will be a"
echo "snapshot_comment.txt file in the main directory for each snapshot and can serve"
echo "as a reference for the state of the snapshot like 'before xyz' and 'after xzy'"
echo ""
echo "This script is intended to be run from a linux system, and attemting to use it"
echo "on MacOS will likely produce erros due to the use of csplit."
echo ""
exit -1
;;
*)
>&2 echo "Error: Unrecognized argument $arg"
>&2 echo "Tip: If using an argument starting with \"--\", make sure to add \"=\" before the value"
exit -1
;;
esac
done
## verify required command line arguments
if [[ ! "$DB_NAME" ]]; then
>&2 echo "Error: Required input --db-name missing. Please use --help for proper usage"
exit -1
fi
# Get the current timestamp for the snapshot name
TIMESTAMP=`date +%Y-%m-%d-%H-%M-%S`
# load local variables
SNAPSHOT_PATH="${DESTINATION_PATH}/${DB_NAME}-${TIMESTAMP}"
DB_TABLE_PATH="${SNAPSHOT_PATH}/tables"
SQL_FILE="${SNAPSHOT_PATH}/${DB_NAME}-${TIMESTAMP}.sql"
# create the directories
mkdir -p ${DB_TABLE_PATH}
# create snapshot comment file if defined
[ -n "${SNAPSHOT_COMMENT}" ] && echo "${SNAPSHOT_COMMENT}" > ${SNAPSHOT_PATH}/snapshot_comment.txt
# excludes comments and uses extended inserts to make comparing diffs easier
# does not require maintaining a separate structure.sql (redundant info), but maintains each table as seperate file
mysqldump \
--single-transaction \
--triggers \
--routines \
--events \
--compact \
--add-drop-table \
--extended-insert=FALSE \
--default-character-set=utf8 \
${DB_NAME} \
| pv > ${SQL_FILE}
# change to table path
cd $DB_TABLE_PATH
# split dump into tables
csplit -s -n 4 -f table_ ${SQL_FILE} "/^DROP TABLE IF EXISTS/" '{*}'
# remove the 1st table file if it's empty
[ ! -s table_0000 ] && rm table_0000
for i in table_*; do
# split table into structure and the data, triggers, etc. (_01 files)
csplit -s -f ${i}_ $i "/^INSERT INTO/" {0} || :
mv $i ${i}.structure
[ -e ${i}_00 ] && mv ${i}_00 ${i}.structure
done;
for i in table_*_01; do
# split the data, triggers, etc. into data and the etc files
csplit -s -f ${i}_ $i "/^\/\*/" {0} || :
mv $i ${i}.data
if [ -e ${i}_00 ]; then
mv ${i}_00 ${i}.data
mv ${i}_01 ${i}.etc
fi
done
for i in table_*.data; do
# sort the data
awk '{print $5 " " $0}' $i | cut -c 2- | sort -n | awk '{$1=""; print substr($0,2)}' >> $i.sorted
rm $i
done
for i in *.structure; do
# rename files based on their table name
table=$(sed -n '1 s/.*`\(.*\)`.*/\1/p' $i)
mv $i $table.structure || :
prefix=${i%.*}
if [ -e ${prefix}_01.data.sorted ]; then
mv ${prefix}_01.data.sorted $table.data.sorted
fi
if [ -e ${prefix}_01.etc ]; then
mv ${prefix}_01.etc $table.etc
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment