Last active
September 20, 2019 15:29
-
-
Save jaytaylor/6802090 to your computer and use it in GitHub Desktop.
Postgres log file compressor which compresses and uploads files to s3 before deleting them.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
## | |
# @author Jay Taylor [@jtaylor] | |
# | |
# @date 2013-10-02 | |
# | |
# Postgres log file compressor which compresses and uploads files to s3 before deleting them. | |
# This script must be located in the same directory as the log files. | |
# | |
# Make sure postgresql.conf contains the following log filename configuration: | |
# | |
# log_filename = 'postgresql-%Y-%m-%d.log' | |
# | |
# ==Installation== | |
# | |
# Install to root user's cron like so: | |
# | |
# 0 0 * * * /data/pg_log/pgLogArchiver.sh | /usr/bin/logger -t pgLogArchiver | |
# | |
# Destination S3 bucket. | |
s3Bucket='my-postgres-logs' | |
# Process log files which older than this many minutes. | |
maxAgeInMinutes=1440 | |
# NB: There must be a unique database name which begins with $prefix. | |
dbNamePrefix='db' | |
test $EUID -ne 0 && echo "error: this script must be run as root" 1>&2 && exit 1 | |
cd "$(dirname "$0")" | |
# Automatically find the database name. | |
dbName=$(sudo -upostgres psql --list | tail --lines=+4 | sed 's/^ //' | grep "^${dbNamePrefix}" | cut -d' ' -f1) | |
rc=$? | |
test $rc -ne 0 && echo "error: failed to find the database name, attempt resulted in exit status ${rc}" 1>&2 && exit $rc | |
test -z "${dbName}" && echo "error: failed to find the database name, attempt resulted in an empty dbName" 1>&2 && exit 1 | |
test -z "$(sudo -u postgres bash -c 'echo $PGDATA')" && echo 'error: $PGDATA environment variable missing for "postgres" user, cannot continue' 1>&2 && exit 1 | |
# Determine if this is a primary or read-slave/standby databse. | |
if test -f "$(sudo -u postgres bash -c 'echo $PGDATA')/recovery.conf"; then | |
primaryOrReadSlave='standby' | |
else | |
primaryOrReadSlave='primary' | |
fi | |
# Iterate over expired logs, compress and upload each to s3 before deleting. | |
for logFile in $(find . -mmin "+${maxAgeInMinutes}" -name "postgresql-*.log" | sort); do | |
echo "info: [$(date)] compressing log file '${logFile}'" | |
# Generate a dated path from the date portion of the log filename. | |
logFileDate="$(echo "${logFile}" | grep --only-matching '\(-[0-9]\+\)\{3\}' | sed 's/^-//')" | |
remoteDatedPath=$(echo "${logFileDate}" | tr '-' '/') | |
startTs=$(date +%s) | |
# NB: use nice to give the compression process the lowest possible priority to minimize impact on production. | |
nice --adjustment=19 xz --compress --keep --force "${logFile}" | |
rc=$? | |
test $rc -ne 0 && echo "error: compression of log file '${logFile}' failed with exit status ${rc}" && exit $rc | |
finishTs=$(date +%s) | |
echo "info: [$(date)] compressed log file '${logFile}' in $(($finishTs - $startTs)) seconds" | |
s3Path="s3://${s3Bucket}/${remoteDatedPath}/${dbName}_${primaryOrReadSlave}_$(hostname)_${logFileDate}.xz" | |
echo "info: [$(date)] uploading '${logFile}.xz' to s3, path will be '${s3Path}'" | |
s3cmd --config=/etc/s3cmd --preserve --acl-private --no-progress put "${logFile}.xz" "${s3Path}" | |
rc=$? | |
test $rc -ne 0 && echo "error: uploading log file '${logFile}.xz' to s3 failed with exit status ${rc}" 1>&2 && exit $rc | |
# Remove archived log file. | |
rm -f "${logFile}" "${logFile}.xz" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment