Skip to content

Instantly share code, notes, and snippets.

@nickumia-reisys
Last active December 10, 2021 18:26
Show Gist options
  • Save nickumia-reisys/8a5da2c3e33b9b7fb2ada263b9f9c52e to your computer and use it in GitHub Desktop.
Save nickumia-reisys/8a5da2c3e33b9b7fb2ada263b9f9c52e to your computer and use it in GitHub Desktop.
Data.gov catalog migration script
#!/bin/bash
# Run this script on FCS catalog-harvester-xyz
set -o errexit
set -o pipefail
set -o nounset
AWS_DEFAULT_REGION=us-gov-west-1
service_name=catalog-db
cat <<EOF
This script exports the $service_name datastore to s3 for re-import into the cloud.gov environment. We'll prompt you for the credentials necessary to access the backup-manager-s3 bucket. To get the credentials, use the service key.
$ cf service-key datastore-backups fcs-migration
EOF
# prompt for secrets
read -p 'AWS_ACCESS_KEY_ID> ' AWS_ACCESS_KEY_ID
read -p 'AWS_SECRET_ACCESS_KEY> ' AWS_SECRET_ACCESS_KEY
read -p 'BUCKET_NAME> ' BUCKET_NAME
read -p 'environment (staging/prod)> ' space_name
export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION
# Source credentials for FCS environment
source ~/.env
backup_path="/fcs-migration/$space_name/${service_name}-$(date +%Y%m%d-%H%M%S)-migration.sql.gz"
time pg_dump --format=custom -h $DB_HOST -U $DB_USER -p $DB_PORT -T spatial_ref_sys ckan | gzip | aws s3 cp - s3://${BUCKET_NAME}${backup_path}
cat <<EOF
$service_name ($space_name) backup complete.
Run this command to complete the migration.
$ cf target -s $space_name
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "create database ckan_temp;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "ckan_temp" -c "drop extension IF EXISTS postgis cascade;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "ckan_temp" -c "select pg_terminate_backend(pid) from pg_stat_activity where datname='$DB_NAME';"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "ckan_temp" -c "drop database $DB_NAME;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "ckan_temp" -c "create database $DB_NAME;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "create extension if not exists postgis;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "create extension if not exists fuzzystrmatch;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "create extension if not exists postfis_tiger_geocoder;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "create extension if not exists postgis_topology;"
$ PGPASSWORD=$DB_PASS psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d "$DB_NAME" -c "drop database ckan_temp;"
$ time cf run-task backup-manager --wait --name "$service_name restore" --command "restore psql $service_name $backup_path"
$ cf run-task catalog -c "ckan db upgrade"
$ cf run-task catalog -c "ckan db search-index rebuild"
EOF
# CKAN DB MIGRATION SCRIPT - Catalog-variant
# Steps to prepare catalog db for ckan migrate restore
cat << EOF
Initial Conditions/Assumptions:
- Catalog app has a catalog-db service bound to it
- cf delete-service catalog-db-migrate
- cf delete-service catalog-db-venerable
- cf set-env backup-manager DATASTORE_S3_SERVICE_NAME backup-manager-s3
EOF
set -o errexit
set -o pipefail
set -o nounset
# Get input params
# Service name: the name of the service that is hosting the S3 Backup
# Backup path: the path in S3 that is the backup location
read -p "Space name> " space_name
read -p "S3 Backup path> " backup_path
# Go to the correct space
cf target -s $space_name
# Create Migration Database
# time cf create-service aws-rds micro-psql catalog-db-migrate --wait
time cf create-service aws-rds medium-psql-redundant catalog-db-migrate -c '{"storage": 350}' --wait
cf bind-service backup-manager catalog-db-migrate
# Connect to the database and get credentials in env
# Ensure the service-connect plugin is installed for cf cli,
# https://github.com/cloud-gov/cf-service-connect#local-installation
# The following extensions are created by catalog postgis install,
# CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
# CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder;
# CREATE EXTENSION IF NOT EXISTS postgis_topology;
# However, the test data loads without them. Verified with small data restore
# from fuhu local ckan.
cf connect-to-service catalog catalog-db-migrate << EOF
CREATE EXTENSION IF NOT EXISTS postgis;
EOF
# Restore backup
time cf run-task backup-manager --name "catalog-restore" --command "PG_RESTORE_OPTIONS='--no-acl' restore psql catalog-db-migrate $backup_path"
cf connect-to-service catalog catalog-db-migrate << EOF
drop index idx_package_resource_package_id;
drop index idx_package_resource_revision_period;
EOF
# Bind to new database
cf rename-service catalog-db catalog-db-venerable
cf rename-service catalog-db-migrate catalog-db
cf unbind-service catalog catalog-db-venerable
cf bind-service catalog catalog-db
# Upgrade DB and reindex SOLR
cf stop catalog
cf run-task catalog -c "ckan db upgrade"
cf restart catalog
cf run-task catalog -c "ckan search-index rebuild"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment