Skip to content

Instantly share code, notes, and snippets.

@mlt
Last active December 10, 2020 23:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mlt/69cf82e5c8a34d1015648e31821376f0 to your computer and use it in GitHub Desktop.
Save mlt/69cf82e5c8a34d1015648e31821376f0 to your computer and use it in GitHub Desktop.
Slightly modified from http://furius.ca/pubcode/pub/conf/bin/pg-split-dump to get type/schema/name.sql structure with all overloaded function definitions in same file
#!/usr/bin/env python
"""
Split a PostgreSQL plain dump file into files for each of its objects.
This is used to compare two SQL database contents: both contents are dumped to a
plain text file, and they are then processed through this script which generates
a file for each database object, which can then be compared using a directory
diff program.
"""
# stdlib imports
import os, re
from os.path import join, exists
from string import maketrans
# Note: this code is pulled straight out of the xxdiff library.
# Copied here to avoid the dependency.
sec_re = re.compile('^--\n-- (?:Data for )?Name:\s+(?:DOMAIN|OPERATOR|SCHEMA|VIEW|COLUMN|EXTENSION|TABLE|FUNCTION|SEQUENCE)?\s*([^(;]+).*;\s*Type:\s+([^;]+);\s*Schema:\s+([^;]+);.*$' , re.M)
def parse_dump(dbdump, sort_columns=False):
"""
Parse a PostgreSQL database dump, extracting all its section into a list of
(name, type, contents) tuples. The entries are built from the Name and Type
fields in the header comments. Contents of data chunks for tables are
returned as type 'DATA', so if you're going to turn this into a map, be
careful to filter out the 'data' entries or concatenate them to the schema
(whichever is appropriate for your application).
"""
# Class to contain info about chunks.
class Chunk:
def __init__(self, mo):
self.mo = mo
self.name, self.typ, self.nspname = mo.group(1, 2, 3)
# Parse chunks.
chunks = map(lambda mo: Chunk(mo), sec_re.finditer(dbdump))
for c1, c2 in consepairs(chunks):
c1.contents = dbdump[c1.mo.end():c2.mo.start()]
chunks[-1].contents = dbdump[chunks[-1].mo.end():]
for c in chunks:
c.descline = c.mo.group(0)
c.mo = None # release the match objects
return list((c.name, c.typ.replace(' ', '_').lower(), c.nspname,
c.descline + c.contents if c.contents else None) for c in chunks)
# Note: this code is pulled straight out of the xxdiff library.
# Copied here to avoid the dependency.
def consepairs(seq):
"""
Iterate over consecutive pairs.
"""
it = iter(seq)
prev = it.next()
for el in it:
if prev.name == el.name:
el.contents = None
continue
yield prev, el
prev = el
def main():
import optparse
parser = optparse.OptionParser(__doc__.strip())
opts, args = parser.parse_args()
if len(args) not in (1, 2):
parser.error("You must specify a single dumpfile, "
"and optionally an output directory.")
fn = args[0]
rootdir = args[1] if len(args) == 2 else os.getcwd()
if not exists(rootdir):
os.makedirs(rootdir)
contents = parse_dump(open(fn).read())
for name, type, nspname, contents in contents:
if not contents:
continue
outdir = join(rootdir, type, nspname)
if not exists(outdir):
os.makedirs(outdir)
fn = '{name}.sql'.format(name=name.translate(maketrans('[]', '__'), '"'))
print fn
f = open(join(outdir, fn), 'w')
f.write(contents.rstrip() + '\n')
f.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment