jaytaylor/pgLogArchiver.sh

## pgLogArchiver.sh
#!/usr/bin/env bash

##
# @author Jay Taylor [@jtaylor]
#
# @date 2013-10-02
#
# Postgres log file compressor which compresses and uploads files to s3 before deleting them.
# This script must be located in the same directory as the log files.
#
# Make sure postgresql.conf contains the following log filename configuration:
#
#     log_filename = 'postgresql-%Y-%m-%d.log'
#
# ==Installation==
#
# Install to root user's cron like so:
#
# 0 0 * * * /data/pg_log/pgLogArchiver.sh | /usr/bin/logger -t pgLogArchiver
#

# Destination S3 bucket.
s3Bucket='my-postgres-logs'

# Process log files which older than this many minutes.
maxAgeInMinutes=1440

# NB: There must be a unique database name which begins with $prefix.
dbNamePrefix='db'


test $EUID -ne 0 && echo "error: this script must be run as root" 1>&2 && exit 1

cd "$(dirname "$0")"


# Automatically find the database name.
dbName=$(sudo -upostgres psql --list | tail --lines=+4 | sed 's/^ //' | grep "^${dbNamePrefix}" | cut -d' ' -f1)
rc=$?
test $rc -ne 0 && echo "error: failed to find the database name, attempt resulted in exit status ${rc}" 1>&2 && exit $rc
test -z "${dbName}" && echo "error: failed to find the database name, attempt resulted in an empty dbName" 1>&2 && exit 1


test -z "$(sudo -u postgres bash -c 'echo $PGDATA')" && echo 'error: $PGDATA environment variable missing for "postgres" user, cannot continue' 1>&2 && exit 1
# Determine if this is a primary or read-slave/standby databse.
if test -f "$(sudo -u postgres bash -c 'echo $PGDATA')/recovery.conf"; then
    primaryOrReadSlave='standby'
else
    primaryOrReadSlave='primary'
fi


# Iterate over expired logs, compress and upload each to s3 before deleting.
for logFile in $(find . -mmin "+${maxAgeInMinutes}" -name "postgresql-*.log" | sort); do
    echo "info: [$(date)] compressing log file '${logFile}'"
    # Generate a dated path from the date portion of the log filename.
    logFileDate="$(echo "${logFile}" | grep --only-matching '\(-[0-9]\+\)\{3\}' | sed 's/^-//')"
    remoteDatedPath=$(echo "${logFileDate}" | tr '-' '/')

    startTs=$(date +%s)

    # NB: use nice to give the compression process the lowest possible priority to minimize impact on production.
    nice --adjustment=19 xz --compress --keep --force "${logFile}"
    rc=$?
    test $rc -ne 0 && echo "error: compression of log file '${logFile}' failed with exit status ${rc}" && exit $rc

    finishTs=$(date +%s)

    echo "info: [$(date)] compressed log file '${logFile}' in $(($finishTs - $startTs)) seconds"

    s3Path="s3://${s3Bucket}/${remoteDatedPath}/${dbName}_${primaryOrReadSlave}_$(hostname)_${logFileDate}.xz"
    echo "info: [$(date)] uploading '${logFile}.xz' to s3, path will be '${s3Path}'"
    s3cmd --config=/etc/s3cmd --preserve --acl-private --no-progress put "${logFile}.xz" "${s3Path}"
    rc=$?
    test $rc -ne 0 && echo "error: uploading log file '${logFile}.xz' to s3 failed with exit status ${rc}" 1>&2 && exit $rc

    # Remove archived log file.
    rm -f "${logFile}" "${logFile}.xz"
done
	#!/usr/bin/env bash

	##
	# @author Jay Taylor [@jtaylor]
	#
	# @date 2013-10-02
	#
	# Postgres log file compressor which compresses and uploads files to s3 before deleting them.
	# This script must be located in the same directory as the log files.
	#
	# Make sure postgresql.conf contains the following log filename configuration:
	#
	# log_filename = 'postgresql-%Y-%m-%d.log'
	#
	# ==Installation==
	#
	# Install to root user's cron like so:
	#
	# 0 0 * * * /data/pg_log/pgLogArchiver.sh \| /usr/bin/logger -t pgLogArchiver
	#

	# Destination S3 bucket.
	s3Bucket='my-postgres-logs'

	# Process log files which older than this many minutes.
	maxAgeInMinutes=1440

	# NB: There must be a unique database name which begins with $prefix.
	dbNamePrefix='db'


	test $EUID -ne 0 && echo "error: this script must be run as root" 1>&2 && exit 1

	cd "$(dirname "$0")"


	# Automatically find the database name.
	dbName=$(sudo -upostgres psql --list \| tail --lines=+4 \| sed 's/^ //' \| grep "^${dbNamePrefix}" \| cut -d' ' -f1)
	rc=$?
	test $rc -ne 0 && echo "error: failed to find the database name, attempt resulted in exit status ${rc}" 1>&2 && exit $rc
	test -z "${dbName}" && echo "error: failed to find the database name, attempt resulted in an empty dbName" 1>&2 && exit 1


	test -z "$(sudo -u postgres bash -c 'echo $PGDATA')" && echo 'error: $PGDATA environment variable missing for "postgres" user, cannot continue' 1>&2 && exit 1
	# Determine if this is a primary or read-slave/standby databse.
	if test -f "$(sudo -u postgres bash -c 'echo $PGDATA')/recovery.conf"; then
	primaryOrReadSlave='standby'
	else
	primaryOrReadSlave='primary'
	fi


	# Iterate over expired logs, compress and upload each to s3 before deleting.
	for logFile in $(find . -mmin "+${maxAgeInMinutes}" -name "postgresql-*.log" \| sort); do
	echo "info: [$(date)] compressing log file '${logFile}'"
	# Generate a dated path from the date portion of the log filename.
	logFileDate="$(echo "${logFile}" \| grep --only-matching '\(-[0-9]\+\)\{3\}' \| sed 's/^-//')"
	remoteDatedPath=$(echo "${logFileDate}" \| tr '-' '/')

	startTs=$(date +%s)

	# NB: use nice to give the compression process the lowest possible priority to minimize impact on production.
	nice --adjustment=19 xz --compress --keep --force "${logFile}"
	rc=$?
	test $rc -ne 0 && echo "error: compression of log file '${logFile}' failed with exit status ${rc}" && exit $rc

	finishTs=$(date +%s)

	echo "info: [$(date)] compressed log file '${logFile}' in $(($finishTs - $startTs)) seconds"

	s3Path="s3://${s3Bucket}/${remoteDatedPath}/${dbName}_${primaryOrReadSlave}_$(hostname)_${logFileDate}.xz"
	echo "info: [$(date)] uploading '${logFile}.xz' to s3, path will be '${s3Path}'"
	s3cmd --config=/etc/s3cmd --preserve --acl-private --no-progress put "${logFile}.xz" "${s3Path}"
	rc=$?
	test $rc -ne 0 && echo "error: uploading log file '${logFile}.xz' to s3 failed with exit status ${rc}" 1>&2 && exit $rc

	# Remove archived log file.
	rm -f "${logFile}" "${logFile}.xz"
	done