Skip to content

Instantly share code, notes, and snippets.

@djm
Forked from bennylope/migrate.py
Created July 4, 2022 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save djm/247926a536faa8b609b3fc773e669550 to your computer and use it in GitHub Desktop.
Save djm/247926a536faa8b609b3fc773e669550 to your computer and use it in GitHub Desktop.
PostgreSQL migration script, Heroku -> Crunchy
#!/usr/bin/env python
import argparse
import os
import subprocess
import sys
import time
# Required so we don't generate tons of logs during restore
disable_logging_sql = "ALTER USER postgres RESET pgaudit.log;"
# Re-enable audit logging after restoring
enable_logging_sql = "ALTER USER postgres SET pgaudit.log='all';"
set_table_owners_sql = """
DO $$
DECLARE
row record;
BEGIN
FOR row IN SELECT schemaname, tablename
FROM pg_tables WHERE NOT schemaname IN ('pg_catalog', 'information_schema') LOOP
EXECUTE FORMAT('ALTER TABLE %I.%I OWNER TO {pg_user};', row.schemaname, row.tablename);
END LOOP;
END;
$$;
"""
set_sequence_owners_sql = """
DO $$
DECLARE
row record;
BEGIN
FOR row IN SELECT sequence_schema, sequence_name
FROM information_schema.sequences WHERE NOT sequence_schema IN ('pg_catalog', 'information_schema') LOOP
EXECUTE FORMAT('ALTER SEQUENCE %I.%I OWNER TO {pg_user};', row.sequence_schema, row.sequence_name);
END LOOP;
END;
$$;
"""
set_view_owners_sql = """
DO $$
DECLARE
row record;
BEGIN
FOR row IN SELECT table_schema, table_name
FROM information_schema.views WHERE NOT table_schema IN ('pg_catalog', 'information_schema')
AND NOT table_name IN ('pg_stat_statements', 'pg_stat_statements_info')
LOOP
EXECUTE FORMAT('ALTER VIEW %I.%I OWNER TO {pg_user};', row.table_schema, row.table_name);
END LOOP;
END;
$$;
"""
set_pg_class_owners_sql = """
DO $$
DECLARE
row record;
BEGIN
FOR row IN SELECT oid::regclass::text as s_oid
FROM pg_class WHERE relkind = 'm'
LOOP
EXECUTE FORMAT('ALTER TABLE %I OWNER TO {pg_user};', row.s_oid);
END LOOP;
END;
$$;
"""
backup_and_restore = "pg_dump -Fc ${heroku_pg_url_name}| pg_restore --no-acl --no-owner -d {pg_url} && exit"
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Backup and restore from Heroku to new PostgreSQL instance."
)
parser.add_argument(
"--app", dest="heroku_app", help="Heroku app name.", required=True
)
parser.add_argument(
"--db",
dest="heroku_db",
default="DATABASE_URL",
help="Heroku database connection string name (e.g. DATABASE_URL, HEROKU_POSTGRESQL_CRIMSON_URL).",
)
parser.add_argument(
"--size",
dest="dyno_size",
default="standard",
choices=[
"standard",
"standard-2x",
"performance",
"performance-l",
],
help="Heroku dyno size for running the backup and restore.",
)
parser.add_argument(
"--user",
dest="pg_user",
default="application",
help="New PostgreSQL user that should own the DB resources (default is 'application').",
)
parser.add_argument(
"--target",
dest="target_pg_url",
help="PostgreSQL connection string for the restoration target (defaults to PG_URL in environment).",
)
args = parser.parse_args()
start_time = time.time()
target_pg_url = args.target_pg_url or os.environ.get("PG_URL")
if not target_pg_url:
sys.stderr.write(
"You must provide a connection string in either the --target argument or PG_URL environment variable."
)
sys.exit(1)
bash_command = subprocess.Popen(
[
"echo",
backup_and_restore.format(
heroku_pg_url_name=args.heroku_db,
pg_url=target_pg_url,
),
],
stdout=subprocess.PIPE,
)
subprocess.run(["psql", "-c", disable_logging_sql, "-d", target_pg_url])
heroku_start = time.time()
subprocess.run(
["heroku", "run", "bash", "--app", args.heroku_app, "--size", args.dyno_size],
stdin=bash_command.stdout,
)
heroku_end = time.time()
cleanup_sql = (
"".join(
[
set_owner_sql.format(pg_user=args.pg_user)
for set_owner_sql in [
set_table_owners_sql,
set_sequence_owners_sql,
set_view_owners_sql,
set_pg_class_owners_sql,
]
]
)
+ enable_logging_sql
)
subprocess.run(["psql", "-c", cleanup_sql, "-d", os.environ["target_pg_url"]])
end_time = time.time()
print("Total completion time", end_time - start_time)
print("Backup and restore time", heroku_end - heroku_start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment