Skip to content

Instantly share code, notes, and snippets.

@zhester
Created June 25, 2013 18:48
Show Gist options
  • Save zhester/5861185 to your computer and use it in GitHub Desktop.
Save zhester/5861185 to your computer and use it in GitHub Desktop.
Properties configuration file scrubber. Can be used to help synchronize configuration files between computers. I use this to update my jEdit configuration files between all my computers. There are some settings that I can't share (e.g. the proxy settings on my work PC), and some settings I'd like to preserve per PC. This handles all that, and al…
#!/usr/bin/env python
##############################################################################
#
# propscrub.py
#
# usage:
# propscrub.py -h
#
# Properties configuration file scrubber. Sorts and removes entries from a
# configuration file. It can remove top-level entries, as well as specific
# entries.
#
# Conversion rules are specified in a JSON file consisting of the following
# basic structure:
# { "delete" : [ "glob1", "glob2" ], "keep" : [ "glob3", "glob4" ] }
#
# Globs look like standard file name globs using * and ? to indicate wildcards
# for property key matching.
#
# Delete globs specify keys which are not permitted in the source file.
#
# Keep globs specify keys whos values must be preserved from a second source
# of properties, even if they are specified in the original source file.
#
# Delete globs will override any functionally equivalent keep globs.
#
# Note: The order in which the rule set, merging source, and original source
# must be such that all insertions into the property list can be filtered.
# Thus, it is probably most common to use the class using this basic pattern:
#
# 1. pdict = property_dict()
# 2. pdict.set_merge( open( 'merge.properties', 'rb' ) )
# 3. pdict.set_rules( json.load( open( 'rules.json', 'rb' ) ) )
# 4. pdict.load( open( 'original.properties', 'rb' ) )
# 5. pdict.dump( open( 'new.properties', 'wb' ) )
#
# Using a different order is fine, but be aware the loading properties before
# the rules are set will load everything. Also, loading the merging
# properties after the rules are set, may delete things you want to keep.
# The convenience function propscrub() uses this technique, and hides the
# order from the user.
#
# Note: This probably also works fine with .ini and most Unix-style .conf
# files. You'll just need to reassign the parse symbols which are static
# to the property_dict class.
#
##############################################################################
import argparse
import collections
import fnmatch
import json
import sys
#=============================================================================
_example_props = """
#A simple comment
#A.somewhat=terrible=comment.
simple_option_a=1
simple_option_b=2
wonky\ key\ a=wonky value a
parent.child=nested value
grandparent.parent.child=nested nested value
greatgrandparent.grandparent.parent.child=nested nested nested value
good.a=1
good.b=2
good.c=3
bad.a=4
bad.b=5
bad.c=6
throwaway=sadface
keep=yay!
keepers.a=7
keepers.b=8
keepers.c=9
a.a.a=aaa
a.b.b=abb
a.c.b=acb
well=this=sucks
"""
#=============================================================================
_example_merge = """
#Another comment
simple_option_a=3
existing_option=42
keep=haha!
keepers.a=10
keepers.b=11
keepers.c=12
keepers.d=13
"""
#=============================================================================
_example_rules = """
{
"delete" : [
"bad.*",
"throwaway",
"a.*.b"
],
"keep" : [
"keepers.*",
"keep"
]
}
"""
#=============================================================================
def match_glist( subject, globlist ):
""" checks a string for a match in a list of glob-style patterns """
for index in range( len( globlist ) ):
if fnmatch.fnmatch( subject, globlist[ index ] ) == True:
return index
return None
#=============================================================================
def propscrub( source, rules = {}, merge = None, target = None ):
""" convenience function that can deal with file handles and names """
# create the property dictionary object
pdict = property_dict()
# check for a specified merge file
if type( merge ) is str:
pdict.set_merge( open( merge, 'rb' ) )
elif merge is not None:
pdict.set_merge( merge )
# check for a specified rule file or dictionary
if type( rules ) is str:
pdict.set_rules( json.load( open( rules, 'rb' ) ) )
else:
pdict.set_rules( rules )
# load the source property list
if type( source ) is str:
pdict.load( open( source, 'rb' ) )
else:
pdict.load( source )
# check for a specified output file
if type( target ) is str:
return pdict.dump( open( target, 'wb' ) )
elif target is not None:
return pdict.dump( target )
else:
return str( pdict )
#=============================================================================
class ordered_dict( collections.OrderedDict ):
""" an OrderedDict that can create missing sub-dictionaries """
#=========================================================================
def __missing__( self, key ):
""" used to create sub-dictionaries without user checks """
self[ key ] = ordered_dict()
return self[ key ]
#=============================================================================
class guarded_dict( ordered_dict ):
""" an ordered_dict that protects itself using insertion rules """
#=========================================================================
def __init__( self, filter_globs = [], *args, **kwargs ):
""" initialize a new garuded_dict instance """
super( guarded_dict, self ).__init__( *args, **kwargs )
self.filter_globs = filter_globs
#=========================================================================
def __setitem__( self, key, value ):
""" override default item setting to check if this key is allowed """
if match_glist( key, self.filter_globs ) == None:
super( guarded_dict, self ).__setitem__( key, value )
#=========================================================================
def set_filter_globs( self, filter_globs ):
""" set the list of glob-style patterns that prevent assignment """
self.filter_globs = filter_globs
#=============================================================================
class property_dict( guarded_dict ):
""" dictionary smart enough to deal with a complex property list """
# static variables
_comm_sym = '#' # comment lines begin with this
_name_sym = '.' # names are separated by this
_set_sym = '=' # key/value pairs are separated by this
#=========================================================================
def __init__( self, *args, **kwargs ):
""" initialize a new property_dict instance """
super( property_dict, self ).__init__( *args, **kwargs )
self.rules = {}
self.eol = '\n'
#=========================================================================
def __setitem__( self, key, value ):
""" override item setting to check for preserved values """
# see if there are rules for preserving values
# and this key matches a preservation rule
if ( 'keep' in self.rules ) \
and ( match_glist( key, self.rules[ 'keep' ] ) != None ):
# do not reassign this value
return
# let the parent do the assignment
super( property_dict, self ).__setitem__( key, value )
#=========================================================================
def __str__( self ):
""" build the string representation of the property list """
buf = ''
keys = self.keys()
keys.sort()
for key in keys:
if key[ : 1 ] == property_dict._comm_sym:
buf += '%s%s' % ( key, self.eol )
else:
buf += '%s%s%s%s' % (
key,
property_dict._set_sym,
self[ key ],
self.eol
)
return buf
#=========================================================================
def dump( self, handle ):
""" dump the property list to a file handle """
return handle.write( str( self ) )
#=========================================================================
def dumps( self ):
""" dump the property list to a string """
return str( self )
#=========================================================================
def load( self, handle ):
""" load the property list from a file handle """
self._load( handle.readlines() )
#=========================================================================
def loads( self, source ):
""" load the property list from a string """
self._load( source.strip().splitlines() )
#=========================================================================
def set_merge( self, handle ):
""" set the merging property list from a file handle """
self._load( handle.readlines() )
#=========================================================================
def set_merges( self, source ):
""" set the merging property list from a string """
self._load( source.strip().splitlines() )
#=========================================================================
def set_rules( self, rules ):
""" set the property list conversion rules """
self.rules = rules
if 'delete' in self.rules:
self.set_filter_globs( self.rules[ 'delete' ] )
#=========================================================================
def _load( self, lines ):
""" load list of properties into object """
# first line flag
first = True
# attempt to load a property from each line
for line in lines:
# check first line for end-of-line style
if first == True:
if line[ -2 : ] == '\r\n':
self.eol = '\r\n'
first = False
# strip exterior whitespace
line = line.strip()
# comment lines are preserved as-is
if line[ : 1 ] == property_dict._comm_sym:
self[ line ] = None
# attempt to assign all property values
else:
( key, value ) = line.split( property_dict._set_sym, 1 )
self[ key ] = value
#=============================================================================
def main( argv ):
""" script execution entry point """
# create and configure an argument parser
parser = argparse.ArgumentParser(
description = 'Development and testing script for Newegg module.'
)
parser.add_argument(
'-i', '--input', default = None,
help = 'Specify property file to read'
)
parser.add_argument(
'-o', '--output', default = None,
help = 'Specify property file to write'
)
parser.add_argument(
'-r', '--rules', default = None,
help = 'Specify rule file'
)
parser.add_argument(
'-m', '--merge', default = None,
help = 'Specify property file to merge'
)
# the parser only wants the arguments (not the program "argument")
args = parser.parse_args( argv[ 1 : ] )
# create a property dictionary
pdict = property_dict()
# see if we are testing the module
if args.input == None:
pdict.set_merges( _example_merge )
pdict.set_rules( json.loads( _example_rules ) )
pdict.loads( _example_props )
# normal operation
else:
if args.merge != None:
pdict.set_merge( open( args.merge, 'rb' ) )
if args.rules != None:
pdict.set_rules( json.load( open( args.rules, 'rb' ) ) )
pdict.load( open( args.input, 'rb' ) )
# check for stdout-style usage
if args.output == None:
pdict.dump( sys.stdout )
# dump properties to file
else:
pdict.dump( open( args.output, 'wb' ) )
# return success.
return 0
#=============================================================================
if __name__ == "__main__":
sys.exit( main( sys.argv ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment