Skip to content

Instantly share code, notes, and snippets.

@lorentzlasson
Last active March 26, 2020 09:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lorentzlasson/f0c685e96921f5375975d1f92a668068 to your computer and use it in GitHub Desktop.
Save lorentzlasson/f0c685e96921f5375975d1f92a668068 to your computer and use it in GitHub Desktop.
Split schema-only PostgreSQL dumps for source control
#! /usr/bin/python
import os, re, sys
# Split pg_dump files to schema/type file hierarchy
# Use with files produced by pg_dump -s
#TODO: Identify dropped objects and delete from tree
# Detect object header lines
re_obj = re.compile(r'-- Name: ([-\w\s\.]+)(?:\([-\w\s\[\],]*\))?; Type: ([-\w\s]+); Schema: ([-\w]+); Owner: ([-\w]*)(?:; Tablespace: )?([-\w]*)\n', flags=re.IGNORECASE)
if len(sys.argv) < 2:
print 'Usage: pg_schema_split infile [outdir]'
sys.exit()
infile = sys.argv[1]
outdir = os.path.split(sys.argv[0][0])
if len(sys.argv) > 2 : outdir = sys.argv[2]
name, type, schema, owner, tablespace = ['']*5
sql = ''
for line in open(infile,'r').readlines():
obj = re_obj.match(line)
if obj:
ot = re_obj.search(line).group(2)
if ot not in ('COMMENT', 'SEQUENCE OWNED BY'): # Keep these in parent script
if schema != 'pg_catalog' and name > '':
if schema == '-': schema = 'schemaless'
#print 'Name: %s; Type: %s; Schema: %s; Owner: %s; Tablespace: %s' % (name, type, schema, owner, tablespace or '(default)')
print 'Schema: %s; Type: %s; Name: %s' % (schema, type, name)
sqlpath = os.path.join(outdir, schema, type)
if not os.path.exists(sqlpath):
print '*** mkdir %s' % sqlpath
os.makedirs(sqlpath)
sqlf = os.path.join(sqlpath, '%s.sql' % name)
sql = sql.replace('\n\n','\n') # Cleanup line spacing
open(sqlf,'w').write(sql.strip())
groups = re_obj.search(line).groups()
name, type, schema, owner, tablespace = map(lambda x: x.replace(' ','_').lower(), groups)
sql = ''
elif line.strip() != '--': # Skip empty comment lines
sql += line
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment