Skip to content

Instantly share code, notes, and snippets.

@BenWhitehead
Last active November 1, 2016 23:55
Show Gist options
  • Save BenWhitehead/3fd5ee409c4e67d274e71362fe77b956 to your computer and use it in GitHub Desktop.
Save BenWhitehead/3fd5ee409c4e67d274e71362fe77b956 to your computer and use it in GitHub Desktop.
Import CloudFront access logs into Redshift

CloudFront access log Import

SQL Template and Script that can be used to import access logs generated by CloudFront and stored in an s3 bucket.

The script uses psql to run the import script against Redshift, so the script should be ran in a location where psql is on the PATH such as postgres:9.4

Expected Environment Variables

var description
PGHOST Redshift hostname/ip
PGPORT Redshift port
PGDATABASE Redshift database name
PGUSER Redshift user
PGPASSWORD Redshift user password
PGSCHEMA Redshift Schema
PGTABLE Redshift table
AWS_ACCESS_KEY_ID AWS API Access Key
AWS_SECRET_ACCESS_KEY AWS API Secret Key
AWS_BUCKET_URI_BASE AWS S3 bucket that contains the CloudFront access log zip files
AWS_BUCKET_REGION AWS Region the bucket is in
#!/bin/bash
set -o errexit -o nounset -o pipefail
function globals {
export LC_ALL=en_US.UTF-8
export LANG="$LC_ALL"
}; globals
PROJECT_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
function yesterday {
date -u --date yesterday "+%Y-%m-%d"
}
function clean {
rm -rf ${PROJECT_ROOT_DIR}/target
}
function validate {
## https://www.postgresql.org/docs/9.4/static/libpq-envars.html
if [ -z ${PGHOST} ]; then
err "PGHOST undefined"
fi
if [ -z ${PGPORT} ]; then
err "PGPORT undefined"
fi
if [ -z ${PGDATABASE} ]; then
err "PGUSER undefined"
fi
if [ -z ${PGUSER} ]; then
err "PGUSER undefined"
fi
if [ -z ${PGPASSWORD} ]; then
err "PGPASSWORD undefined"
fi
if [ -z ${PGSCHEMA} ]; then
err "PGSCHEMA undefined"
fi
if [ -z ${PGTABLE} ]; then
err "PGTABLE undefined"
fi
if [ -z ${AWS_ACCESS_KEY_ID} ]; then
err "AWS_ACCESS_KEY_ID undefined"
fi
if [ -z ${AWS_SECRET_ACCESS_KEY} ]; then
err "AWS_SECRET_ACCESS_KEY undefined"
fi
if [ -z ${AWS_BUCKET_URI_BASE} ]; then
err "AWS_BUCKET_URI_BASE undefined"
fi
if [ -z ${AWS_BUCKET_REGION} ]; then
err "AWS_BUCKET_REGION undefined"
fi
}
function generateSql {(
cd ${PROJECT_ROOT_DIR}
mkdir -p target
export YESTERDAY=$(yesterday)
msg "Generating sql conf for ${YESTERDAY}..."
envsubst \
< refresh.sql.template \
> target/refresh.sql
msg "Generated sql conf at target/refresh.sql"
)}
function runRefresh {(
msg "Running refresh"
psql -f target/refresh.sql
msg "Running refresh complete"
rm target/refresh.sql
)}
function main {(
validate
cd ${PROJECT_ROOT_DIR}
generateSql
runRefresh
)}
function now { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n' ;}
function msg { println "$*" >&2 ;}
function err { local x=$? ; msg "$*" ; return $(( $x == 0 ? 1 : $x )) ;}
function println { printf '%s\n' "$(now) $*" ;}
function print { printf '%s ' "$(now) $*" ;}
######################### Delegates to subcommands or runs main, as appropriate
if [[ ${1:-} ]] && declare -F | cut -d' ' -f3 | fgrep -qx -- "${1:-}"
then "$@"
else main
fi
COPY $PGSCHEMA.$PGTABLE
FROM '$AWS_BUCKET_URI_BASE.$YESTERDAY'
CREDENTIALS 'aws_access_key_id=$AWS_ACCESS_KEY_ID;aws_secret_access_key=$AWS_SECRET_ACCESS_KEY'
DELIMITER '\t' MAXERROR 10 FILLRECORD IGNOREHEADER 2 gzip region '$AWS_BUCKET_REGION';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment