Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Python script to take care of postgres backup and restore of data
#!/usr/bin/python3
import argparse
import logging
import subprocess
import os
import tempfile
from tempfile import mkstemp
import configparser
import gzip
import boto3
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
# Amazon S3 settings.
# AWS_ACCESS_KEY_ID in ~/.aws/credentials
# AWS_SECRET_ACCESS_KEY in ~/.aws/credentials
import datetime
from shutil import move
AWS_BUCKET_NAME = 'backup.mydomain.com'
AWS_BUCKET_PATH = 'postgres/'
BACKUP_PATH = '/tmp/'
def upload_to_s3(file_full_path, dest_file):
"""
Upload a file to an AWS S3 bucket.
"""
s3_client = boto3.client('s3')
try:
s3_client.upload_file(file_full_path, AWS_BUCKET_NAME, AWS_BUCKET_PATH + dest_file)
os.remove(file_full_path)
except boto3.exceptions.S3UploadFailedError as exc:
print(exc)
exit(1)
def download_from_s3(backup_s3_key, dest_file):
"""
Upload a file to an AWS S3 bucket.
"""
s3_client = boto3.resource('s3')
try:
s3_client.meta.client.download_file(AWS_BUCKET_NAME, backup_s3_key, dest_file)
except Exception as e:
print(e)
exit(1)
def list_available_backup():
key_list = []
s3_client = boto3.client('s3')
s3_objects = s3_client.list_objects_v2(Bucket=AWS_BUCKET_NAME, Prefix=AWS_BUCKET_PATH)
for key in s3_objects['Contents']:
key_list.append(key['Key'])
return key_list
def list_postgres_databases(host, database_name, port, user, password):
try:
process = subprocess.Popen(
['psql',
'--dbname=postgresql://{}:{}@{}:{}/{}'.format(user, password, host, port, database_name),
'--list'],
stdout=subprocess.PIPE
)
output = process.communicate()[0]
if int(process.returncode) != 0:
print('Command failed. Return code : {}'.format(process.returncode))
exit(1)
return output
except Exception as e:
print(e)
exit(1)
def backup_postgres_db(host, database_name, port, user, password, dest_file, verbose):
"""
Backup postgres db to a file.
"""
if verbose:
try:
process = subprocess.Popen(
['pg_dump',
'--dbname=postgresql://{}:{}@{}:{}/{}'.format(user, password, host, port, database_name),
'-Fc',
'-f', dest_file,
'-v'],
stdout=subprocess.PIPE
)
output = process.communicate()[0]
if int(process.returncode) != 0:
print('Command failed. Return code : {}'.format(process.returncode))
exit(1)
return output
except Exception as e:
print(e)
exit(1)
else:
try:
process = subprocess.Popen(
['pg_dump',
'--dbname=postgresql://{}:{}@{}:{}/{}'.format(user, password, host, port, database_name),
'-f', dest_file],
stdout=subprocess.PIPE
)
output = process.communicate()[0]
if process.returncode != 0:
print('Command failed. Return code : {}'.format(process.returncode))
exit(1)
return output
except Exception as e:
print(e)
exit(1)
def compress_file(src_file):
compressed_file = "{}.gz".format(str(src_file))
with open(src_file, 'rb') as f_in:
with gzip.open(compressed_file, 'wb') as f_out:
for line in f_in:
f_out.write(line)
return compressed_file
def extract_file(src_file):
extracted_file, extension = os.path.splitext(src_file)
print(extracted_file)
with gzip.open(src_file, 'rb') as f_in:
with open(extracted_file, 'wb') as f_out:
for line in f_in:
f_out.write(line)
return extracted_file
def remove_faulty_statement_from_dump(src_file):
temp_file, _ = tempfile.mkstemp()
try:
with open(temp_file, 'w+') as dump_temp:
process = subprocess.Popen(
['pg_restore',
'-l'
'-v',
src_file],
stdout=subprocess.PIPE
)
output = subprocess.check_output(('grep','-v','"EXTENSION - plpgsql"'), stdin=process.stdout)
process.wait()
if int(process.returncode) != 0:
print('Command failed. Return code : {}'.format(process.returncode))
exit(1)
os.remove(src_file)
with open(src_file, 'w+') as cleaned_dump:
subprocess.call(
['pg_restore',
'-L'],
stdin=output,
stdout=cleaned_dump
)
except Exception as e:
print("Issue when modifying dump : {}".format(e))
def change_user_from_dump(source_dump_path, old_user, new_user):
fh, abs_path = mkstemp()
with os.fdopen(fh, 'w') as new_file:
with open(source_dump_path) as old_file:
for line in old_file:
new_file.write(line.replace(old_user, new_user))
# Remove original file
os.remove(source_dump_path)
# Move new file
move(abs_path, source_dump_path)
def restore_postgres_db(db_host, db, port, user, password, backup_file, verbose):
"""
Restore postgres db from a file.
"""
if verbose:
try:
print(user,password,db_host,port, db)
process = subprocess.Popen(
['pg_restore',
'--no-owner',
'--dbname=postgresql://{}:{}@{}:{}/{}'.format(user,
password,
db_host,
port, db),
'-v',
backup_file],
stdout=subprocess.PIPE
)
output = process.communicate()[0]
if int(process.returncode) != 0:
print('Command failed. Return code : {}'.format(process.returncode))
return output
except Exception as e:
print("Issue with the db restore : {}".format(e))
else:
try:
process = subprocess.Popen(
['pg_restore',
'--no-owner',
'--dbname=postgresql://{}:{}@{}:{}/{}'.format(user,
password,
db_host,
port, db),
backup_file],
stdout=subprocess.PIPE
)
output = process.communicate()[0]
if int(process.returncode) != 0:
print('Command failed. Return code : {}'.format(process.returncode))
return output
except Exception as e:
print("Issue with the db restore : {}".format(e))
def create_db(db_host, database, db_port, user_name, user_password):
try:
con = psycopg2.connect(dbname='postgres', port=db_port,
user=user_name, host=db_host,
password=user_password)
except Exception as e:
print(e)
exit(1)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
try:
cur.execute("DROP DATABASE {} ;".format(database))
except Exception as e:
print('DB does not exist, nothing to drop')
cur.execute("CREATE DATABASE {} ;".format(database))
cur.execute("GRANT ALL PRIVILEGES ON DATABASE {} TO {} ;".format(database, user_name))
return database
def swap_restore_active(db_host, restore_database, active_database, db_port, user_name, user_password):
try:
con = psycopg2.connect(dbname='postgres', port=db_port,
user=user_name, host=db_host,
password=user_password)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute("SELECT pg_terminate_backend( pid ) "
"FROM pg_stat_activity "
"WHERE pid <> pg_backend_pid( ) "
"AND datname = '{}'".format(active_database))
cur.execute("DROP DATABASE {}".format(active_database))
cur.execute('ALTER DATABASE "{}" RENAME TO "{}";'.format(restore_database, active_database))
except Exception as e:
print(e)
exit(1)
def swap_restore_new(db_host, restore_database, new_database, db_port, user_name, user_password):
try:
con = psycopg2.connect(dbname='postgres', port=db_port,
user=user_name, host=db_host,
password=user_password)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('ALTER DATABASE "{}" RENAME TO "{}";'.format(restore_database, new_database))
except Exception as e:
print(e)
exit(1)
def main():
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
args_parser = argparse.ArgumentParser(description='Postgres database management')
args_parser.add_argument("--action",
metavar="action",
choices=['list', 'list_dbs', 'restore', 'backup'],
required=True)
args_parser.add_argument("--date",
metavar="YYYY-MM-dd",
help="Date to use for restore (show with --action list)")
args_parser.add_argument("--dest-db",
metavar="dest_db",
default=None,
help="Name of the new restored database")
args_parser.add_argument("--verbose",
default=True,
help="verbose output")
args_parser.add_argument("--configfile",
required=True,
help="Database configuration file")
args = args_parser.parse_args()
config = configparser.ConfigParser()
config.read(args.configfile)
postgres_host = config.get('postgresql', 'host')
postgres_port = config.get('postgresql', 'port')
postgres_db = config.get('postgresql', 'db')
postgres_restore = "{}_restore".format(postgres_db)
postgres_user = config.get('postgresql', 'user')
postgres_password = config.get('postgresql', 'password')
timestr = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
filename = 'backup-{}-{}.dump'.format(timestr, postgres_db)
filename_compressed = '{}.gz'.format(filename)
restore_filename = '/tmp/restore.dump.gz'
restore_uncompressed = '/tmp/restore.dump'
local_file_path = '{}{}'.format(BACKUP_PATH, filename)
# list task
if args.action == "list":
logger.info('Listing S3 bucket s3://{}/{} content :'.format(AWS_BUCKET_NAME,
AWS_BUCKET_PATH))
s3_backup_objects = list_available_backup()
for key in s3_backup_objects:
logger.info("Key : {}".format(key))
# list databases task
elif args.action == "list_dbs":
result = list_postgres_databases(postgres_host,
postgres_db,
postgres_port,
postgres_user,
postgres_password)
for line in result.splitlines():
logger.info(line)
# backup task
elif args.action == "backup":
logger.info('Backing up {} database to {}'.format(postgres_db, local_file_path))
result = backup_postgres_db(postgres_host,
postgres_db,
postgres_port,
postgres_user,
postgres_password,
local_file_path, args.verbose)
for line in result.splitlines():
logger.info(line)
logger.info("Backup complete")
logger.info("Compressing {}".format(local_file_path))
comp_file = compress_file(local_file_path)
logger.info('Uploading {} to Amazon S3...'.format(comp_file))
upload_to_s3(comp_file, filename_compressed)
logger.info("Uploaded to {}".format(filename_compressed))
# restore task
elif args.action == "restore":
if not args.date:
logger.warn('No date was chosen for restore. Run again with the "list" '
'action to see available restore dates')
else:
try:
os.remove(restore_filename)
except Exception as e:
logger.info(e)
all_backup_keys = list_available_backup()
backup_match = [s for s in all_backup_keys if args.date in s]
if backup_match:
logger.info("Found the following backup : {}".format(backup_match))
else:
logger.error("No match found for backups with date : {}".format(args.date))
logger.info("Available keys : {}".format([s for s in all_backup_keys]))
exit(1)
logger.info("Downloading {} from S3 into : {}".format(backup_match[0], restore_filename))
download_from_s3(backup_match[0], restore_filename)
logger.info("Download complete")
logger.info("Extracting {}".format(restore_filename))
ext_file = extract_file(restore_filename)
# cleaned_ext_file = remove_faulty_statement_from_dump(ext_file)
logger.info("Extracted to : {}".format(ext_file))
logger.info("Creating temp database for restore : {}".format(postgres_restore))
tmp_database = create_db(postgres_host,
postgres_restore,
postgres_port,
postgres_user,
postgres_password)
logger.info("Created temp database for restore : {}".format(tmp_database))
logger.info("Restore starting")
result = restore_postgres_db(postgres_host,
postgres_restore,
postgres_port,
postgres_user,
postgres_password,
restore_uncompressed,
args.verbose)
for line in result.splitlines():
logger.info(line)
logger.info("Restore complete")
if args.dest_db is not None:
restored_db_name = args.dest_db
logger.info("Switching restored database with new one : {} > {}".format(
postgres_restore, restored_db_name
))
swap_restore_new(postgres_host,
postgres_restore,
restored_db_name,
postgres_port,
postgres_user,
postgres_password)
else:
restored_db_name = postgres_db
logger.info("Switching restored database with active one : {} > {}".format(
postgres_restore, restored_db_name
))
swap_restore_active(postgres_host,
postgres_restore,
restored_db_name,
postgres_port,
postgres_user,
postgres_password)
logger.info("Database restored and active.")
else:
logger.warn("No valid argument was given.")
logger.warn(args)
if __name__ == '__main__':
main()
@minderov
Copy link

minderov commented May 8, 2019

Thanks for sharing this script!

I am not the author, but for those of you who are wondering how to use it:

  1. Copy the script to a file, say backup_psql.py
  2. Install boto3 & psycopg (pip3 install boto3 psycopg2-binary)
  3. Create a psql.config (you can choose your own name) file with this content:
[postgresql]
host=<your_psql_addr(probably 127.0.0.1)>
port=<your_psql_port(probably 5432)>
db=<your_db_name>
user=<your_username>
password=<your_password>
  1. Finally, to back up, using the script name from step 1 (here backup_psql.py) and config name from step 3 (here psql.config) run:
python3 backup_psql.py --configfile psql.config --action backup --verbose VERBOSE

@gyecli
Copy link

gyecli commented Jul 8, 2019

Hello to the author(s),

Thanks for making this wonderful script. Are there any licenses attached to it? What are modification/redistribution rules of your script, if any?

Thanks!

@valferon
Copy link
Author

valferon commented Jul 8, 2019

Standard MIT License, free to use and modify.

Since there is interest in this one, I'll drop it in a repo with a Readme and instructions

@valferon
Copy link
Author

valferon commented Jul 9, 2019

There you go, https://github.com/valferon/postgres-manage-python
Feel free to contribute to it ;)

@gyecli
Copy link

gyecli commented Jul 9, 2019

@valferon Thank you!

@fgiraldi
Copy link

fgiraldi commented Sep 23, 2019

Wow! Thank you! Great job!
I will give it a try.

@alsedev
Copy link

alsedev commented Oct 11, 2019

Excellent! What I was looking for. Thanks a lot!

@mudx
Copy link

mudx commented Feb 10, 2020

hey .. great work.. :) it all i been looking for... but i got a question... does it work with Digital Ocean Spaces.. ?? Should i add a handler for the Secret key on the config file ?? Or it there an other way to handle with it..

thx

@valferon
Copy link
Author

valferon commented Feb 11, 2020

hey .. great work.. :) it all i been looking for... but i got a question... does it work with Digital Ocean Spaces.. ?? Should i add a handler for the Secret key on the config file ?? Or it there an other way to handle with it..

thx

I've never used Digital Ocean but I'd assume that it's fairly similar.
You can probably get it to work with minor changes.

@vijsanjeev3
Copy link

vijsanjeev3 commented Mar 26, 2021

DO you have any scripts for Azuer Blob storage as well.

@andybosleythomas
Copy link

andybosleythomas commented Jul 2, 2021

this is great. thanks for this.

@Labyo
Copy link

Labyo commented Aug 15, 2021

thanks.

@nibbl
Copy link

nibbl commented Oct 25, 2021

what is the sense of using subprocess for backup? why you don't use some postgresql modules?

@valferon
Copy link
Author

valferon commented Oct 25, 2021

Which module to you have in mind ?

@nibbl
Copy link

nibbl commented Oct 25, 2021

psycopg2 for example

@valferon
Copy link
Author

valferon commented Oct 25, 2021

Correct me if I'm wrong but I don't think psycopg2 has backup / restore functionalities.
At the time I originally made this, I couldn't find a working solution but maybe that has changed now.

@saramonteiro
Copy link

saramonteiro commented May 4, 2022

psycopg2 for example

Very interesting module. Nice to know. But I took a look and it seems it doesn't provide dump/restore features.

@saramonteiro
Copy link

saramonteiro commented May 4, 2022

Actually, it seems there's a way to use psycopg2 to backup, but seems lot of manual (I don't know if it really worths the change):
https://stackoverflow.com/questions/23732900/postgresql-database-backup-using-python

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment