regilero/serialization_sql_dump_cleaner.sh

## serialization_sql_dump_cleaner.sh
#!/bin/bash
##################################################################
# Licensed under GNU GPL v3                                      #
# regis.leroy@gmail.com                                          #
#                                                                #
# DNS replacement scrpit in SQL dumps containing (also) PHP      #
# serialized strings.                                            #
# This script use bash and perl' perl is used to increment       #
# serialized string length while performing DNS replacement      #
# It also use sed for other basic DNS replacements               #
##################################################################

###### CONFIG ####################################################
OLD_DNS="to.be.replaced.com"
NEW_DNS="foobar.titi.example.com"
# source file to cleanup
DUMP_FILE="exemple.txt"
###### END CONFIG ################################################

# Replace . by \. so perl will not interpret dots
ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

# Test new DNS is not a subdomain, infinite loops in perl replacements
if [[ "${NEW_DNS}" == *${OLD_DNS}* ]]
then
  echo "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
  exit 1;
fi

SED=`which sed`;
if [ ! ${SED} ]; then
    echo "Error: 'sed' command not found."
    exit 1;
fi

PERL=`which perl`;
if [ ! ${PERL} ]; then
    echo "Error: 'perl' command not found."
    exit 1;
fi

LEN1=${#OLD_DNS}
LEN2=${#NEW_DNS}
DIRECTION=$((LEN2>LEN1))
COUNT=$((LEN2-LEN1))
if [[ $COUNT -eq 0 ]]; then
    echo "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
else

    NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
    if [ "0" != "${NB}" ]; then
        echo "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
        echo "Starting serialized content inline replacement in dump with string lenght increment..."
        # $1 : «([;|{]s:)» : detect start of serialized string with «;s:» or «{s:» or «s:»
        # $2 : «([0-9]+)» : the serialized string length numbers
        # «:\\"» : start of the string with «\";»
        # $3 and $4 : «(((?!\\";).)*?)» :   (?!\\";) means not the substring «\";», ((XX.)*?) so here we match everything (.*) which does not contain this substring and the last ? means
        # here a small bug $4 contains the last matched char. unused.
        # non greedy, se we take the shortest match
        # $5 : «('${OLD_DNS2}')» :  finally it is (foo\.example\.com)' matching the DNS to replace
        # $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
        # it is OK as next pattern is closing the serialized string.
        # non greedy: i.e. regular mode is match as much as you can contain in backward mode,
        # in non greedy is match the smallest way still working
        # it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
        # «\\";» last pattern is end of serialized string
        # problem is that $6 contains the rest of string after 1st old DNS match.
        # This string may contain other occurrences of old DNS
        # and replacement should be done several times until nothing more happens, nb of replacement is
        # returned by the s// pattern, so we loop until nothing more happens with the «l while»
        # TEST with: perl -n -pe '$C+=s#([;|{]?s:)([0-9]+):\\"(((?!\\";).)*?)('${OLD_DNS2}')(.*?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
        ${PERL} -n -p -i -e '$rgx=qr/([;|{]?s:)([0-9]+):\\"(((?!\\";).)*?)('${ESCAPED_OLD_DNS}')(.*?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${DUMP_FILE}
        echo "Done with serialized strings"
    else
        echo "${OLD_DNS} not found in file, quite certainly nothing to be done."
    fi
fi
NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
if [ "0" != "${NB}" ]; then
    echo "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
    `${SED} -i "s#${OLD_DNS}#${NEW_DNS}#g" ${DUMP_FILE}`
fi
echo "Everything Done";
exit 0;
	#!/bin/bash
	##################################################################
	# Licensed under GNU GPL v3 #
	# regis.leroy@gmail.com #
	# #
	# DNS replacement scrpit in SQL dumps containing (also) PHP #
	# serialized strings. #
	# This script use bash and perl' perl is used to increment #
	# serialized string length while performing DNS replacement #
	# It also use sed for other basic DNS replacements #
	##################################################################

	###### CONFIG ####################################################
	OLD_DNS="to.be.replaced.com"
	NEW_DNS="foobar.titi.example.com"
	# source file to cleanup
	DUMP_FILE="exemple.txt"
	###### END CONFIG ################################################

	# Replace . by \. so perl will not interpret dots
	ESCAPED_NEW_DNS=${NEW_DNS//\./\\.}
	ESCAPED_OLD_DNS=${OLD_DNS//\./\\.}

	# Test new DNS is not a subdomain, infinite loops in perl replacements
	if [[ "${NEW_DNS}" == ${OLD_DNS} ]]
	then
	echo "Error: This script cannot handle subdomains replacements, risk of infinite loops, sorry!";
	exit 1;
	fi

	SED=`which sed`;
	if [ ! ${SED} ]; then
	echo "Error: 'sed' command not found."
	exit 1;
	fi

	PERL=`which perl`;
	if [ ! ${PERL} ]; then
	echo "Error: 'perl' command not found."
	exit 1;
	fi

	LEN1=${#OLD_DNS}
	LEN2=${#NEW_DNS}
	DIRECTION=$((LEN2>LEN1))
	COUNT=$((LEN2-LEN1))
	if [[ $COUNT -eq 0 ]]; then
	echo "Old and new domain name have the same size, no special serialization hack needed before classical sed replacement"
	else

	NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	echo "Found ${NB} lines matching at least once ${OLD_DNS} in this file"
	echo "Starting serialized content inline replacement in dump with string lenght increment..."
	# $1 : «([;\|{]s:)» : detect start of serialized string with «;s:» or «{s:» or «s:»
	# $2 : «([0-9]+)» : the serialized string length numbers
	# «:\\"» : start of the string with «\";»
	# $3 and $4 : «(((?!\\";).)?)» : (?!\\";) means not the substring «\";», ((XX.)?) so here we match everything (.*) which does not contain this substring and the last ? means
	# here a small bug $4 contains the last matched char. unused.
	# non greedy, se we take the shortest match
	# $5 : «('${OLD_DNS2}')» : finally it is (foo\.example\.com)' matching the DNS to replace
	# $6: «(.*?)» : match anything until the next pattern, the ? makes it a non-greedy match (shortest)
	# it is OK as next pattern is closing the serialized string.
	# non greedy: i.e. regular mode is match as much as you can contain in backward mode,
	# in non greedy is match the smallest way still working
	# it will make the match as small as possible, and next pattern will match the 1st end of serialized string available
	# «\\";» last pattern is end of serialized string
	# problem is that $6 contains the rest of string after 1st old DNS match.
	# This string may contain other occurrences of old DNS
	# and replacement should be done several times until nothing more happens, nb of replacement is
	# returned by the s// pattern, so we loop until nothing more happens with the «l while»
	# TEST with: perl -n -pe '$C+=s#([;\|{]?s:)([0-9]+):\\"(((?!\\";).)?)('${OLD_DNS2}')(.?)\\";#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge; END{print"$C\n"}' < exemple.txt
	${PERL} -n -p -i -e '$rgx=qr/([;\|{]?s:)([0-9]+):\\"(((?!\\";).)?)('${ESCAPED_OLD_DNS}')(.?)\\";/; 1 while s#$rgx#"$1".($2+'${COUNT}').":\\\"$3'${NEW_DNS}'$6\\\";"#ge;' ${DUMP_FILE}
	echo "Done with serialized strings"
	else
	echo "${OLD_DNS} not found in file, quite certainly nothing to be done."
	fi
	fi
	NB=`grep -c "${OLD_DNS}" ${DUMP_FILE}`;
	if [ "0" != "${NB}" ]; then
	echo "Replacing ${NB} remaining lines matching the old domain outside serialized data in dump: ";
	`${SED} -i "s#${OLD_DNS}#${NEW_DNS}#g" ${DUMP_FILE}`
	fi
	echo "Everything Done";
	exit 0;