Skip to content

Instantly share code, notes, and snippets.

@tmc
Created April 27, 2010 03:15
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tmc/380278 to your computer and use it in GitHub Desktop.
Save tmc/380278 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import os
import sys
import subprocess
def simplify_pg_index_line(line):
"""
Given a postgres index file line returns it's simplified form.
Returns None for comments
"""
TYPES_WITHOUT_OWNER = ['CAST']
parts = line.split()
# bail on empty lines and comments
if not parts or parts[0] == ';':
return ''
meta = {} # we won't actually use this but maybe this'll be handly in the future
meta['size_location_info'] = (parts.pop(0), parts.pop(0))
meta['type'] = parts.pop(0)
if meta['type'] not in TYPES_WITHOUT_OWNER:
meta['owner'] = parts.pop()
return ' '.join(parts)
def get_index(dump_filename):
"""
Returns the text for the index of a postgres dump file
"""
command = os.environ.get('PG_RESTORE', 'pg_restore')
p = subprocess.Popen([command, '-l', dump_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
return stdout.split('\n')
def return_fuzzy_matched_lines(good_lines, text):
result = []
for line in text:
for gl in good_lines:
if gl in line:
result.append(line)
return result
def generate_uncommon_index_contents(indexA, indexB):
"""
Returns a postgres index file that has lines that are unique to indexB using
fuzzy matching.
"""
simplified_indexA = [simplify_pg_index_line(line) for line in indexA]
simplified_indexB = [simplify_pg_index_line(line) for line in indexB]
unique_to_B = set(simplified_indexB).difference(simplified_indexA)
return '\n'.join(return_fuzzy_matched_lines(unique_to_B, indexB))
def main():
if len(sys.argv) != 3:
print """Usage: %s template_dump actual_dump
Generates the needed schema file to restore only the differences between two
postgres schemas. I made this to deal with postgis dumps moving between
machines. The template_dump should come from the source machine not the
destination machine.
Expects two postgres dump files created with 'pg_dump -Fc (databasename) > filename'.
Example workflow:
First generate dump files:
pg_dump -Fc template_postgis > template_postgis.db
pg_dump -Fc some_database > some_database.db
Get them to the local machine (via scp or whatever) then on the local machine:
python pgsql_schema_diff.py template_postgis.db some_database.db > dbcontents
createdb -T template_postgis some_database
pg_restore -L dbcontents -vxO1d some_database some_database.db
""" % sys.argv[0]
sys.exit(1)
print generate_uncommon_index_contents(get_index(sys.argv[1]), get_index(sys.argv[2]))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment