zhester/propscrub.py

## propscrub.py
#!/usr/bin/env python
##############################################################################
#
# propscrub.py
#
# usage:
#   propscrub.py -h
#
# Properties configuration file scrubber.  Sorts and removes entries from a
# configuration file.  It can remove top-level entries, as well as specific
# entries.
#
# Conversion rules are specified in a JSON file consisting of the following
# basic structure:
#   { "delete" : [ "glob1", "glob2" ], "keep" : [ "glob3", "glob4" ] }
#
# Globs look like standard file name globs using * and ? to indicate wildcards
# for property key matching.
#
# Delete globs specify keys which are not permitted in the source file.
#
# Keep globs specify keys whos values must be preserved from a second source
# of properties, even if they are specified in the original source file.
#
# Delete globs will override any functionally equivalent keep globs.
#
# Note: The order in which the rule set, merging source, and original source
# must be such that all insertions into the property list can be filtered.
# Thus, it is probably most common to use the class using this basic pattern:
#
# 1. pdict = property_dict()
# 2. pdict.set_merge( open( 'merge.properties', 'rb' ) )
# 3. pdict.set_rules( json.load( open( 'rules.json', 'rb' ) ) )
# 4. pdict.load( open( 'original.properties', 'rb' ) )
# 5. pdict.dump( open( 'new.properties', 'wb' ) )
#
# Using a different order is fine, but be aware the loading properties before
# the rules are set will load everything.  Also, loading the merging
# properties after the rules are set, may delete things you want to keep.
# The convenience function propscrub() uses this technique, and hides the
# order from the user.
#
# Note: This probably also works fine with .ini and most Unix-style .conf
# files.  You'll just need to reassign the parse symbols which are static
# to the property_dict class.
#
##############################################################################


import argparse
import collections
import fnmatch
import json
import sys


#=============================================================================
_example_props = """
#A simple comment
#A.somewhat=terrible=comment.
simple_option_a=1
simple_option_b=2
wonky\ key\ a=wonky value a
parent.child=nested value
grandparent.parent.child=nested nested value
greatgrandparent.grandparent.parent.child=nested nested nested value
good.a=1
good.b=2
good.c=3
bad.a=4
bad.b=5
bad.c=6
throwaway=sadface
keep=yay!
keepers.a=7
keepers.b=8
keepers.c=9
a.a.a=aaa
a.b.b=abb
a.c.b=acb
well=this=sucks
"""

#=============================================================================
_example_merge = """
#Another comment
simple_option_a=3
existing_option=42
keep=haha!
keepers.a=10
keepers.b=11
keepers.c=12
keepers.d=13
"""

#=============================================================================
_example_rules = """
{
    "delete" : [
        "bad.*",
        "throwaway",
        "a.*.b"
    ],
    "keep" : [
        "keepers.*",
        "keep"
    ]
}
"""


#=============================================================================
def match_glist( subject, globlist ):
    """ checks a string for a match in a list of glob-style patterns """
    for index in range( len( globlist ) ):
        if fnmatch.fnmatch( subject, globlist[ index ] ) == True:
            return index
    return None


#=============================================================================
def propscrub( source, rules = {}, merge = None, target = None ):
    """ convenience function that can deal with file handles and names """

    # create the property dictionary object
    pdict = property_dict()

    # check for a specified merge file
    if type( merge ) is str:
        pdict.set_merge( open( merge, 'rb' ) )
    elif merge is not None:
        pdict.set_merge( merge )

    # check for a specified rule file or dictionary
    if type( rules ) is str:
        pdict.set_rules( json.load( open( rules, 'rb' ) ) )
    else:
        pdict.set_rules( rules )

    # load the source property list
    if type( source ) is str:
        pdict.load( open( source, 'rb' ) )
    else:
        pdict.load( source )

    # check for a specified output file
    if type( target ) is str:
        return pdict.dump( open( target, 'wb' ) )
    elif target is not None:
        return pdict.dump( target )
    else:
        return str( pdict )


#=============================================================================
class ordered_dict( collections.OrderedDict ):
    """ an OrderedDict that can create missing sub-dictionaries """

    #=========================================================================
    def __missing__( self, key ):
        """ used to create sub-dictionaries without user checks """
        self[ key ] = ordered_dict()
        return self[ key ]


#=============================================================================
class guarded_dict( ordered_dict ):
    """ an ordered_dict that protects itself using insertion rules """

    #=========================================================================
    def __init__( self, filter_globs = [], *args, **kwargs ):
        """ initialize a new garuded_dict instance """
        super( guarded_dict, self ).__init__( *args, **kwargs )
        self.filter_globs = filter_globs

    #=========================================================================
    def __setitem__( self, key, value ):
        """ override default item setting to check if this key is allowed """
        if match_glist( key, self.filter_globs ) == None:
            super( guarded_dict, self ).__setitem__( key, value )

    #=========================================================================
    def set_filter_globs( self, filter_globs ):
        """ set the list of glob-style patterns that prevent assignment """
        self.filter_globs = filter_globs


#=============================================================================
class property_dict( guarded_dict ):
    """ dictionary smart enough to deal with a complex property list """

    # static variables
    _comm_sym = '#'                 # comment lines begin with this
    _name_sym = '.'                 # names are separated by this
    _set_sym  = '='                 # key/value pairs are separated by this

    #=========================================================================
    def __init__( self, *args, **kwargs ):
        """ initialize a new property_dict instance """
        super( property_dict, self ).__init__( *args, **kwargs )
        self.rules = {}
        self.eol   = '\n'

    #=========================================================================
    def __setitem__( self, key, value ):
        """ override item setting to check for preserved values """

        # see if there are rules for preserving values
        # and this key matches a preservation rule
        if ( 'keep' in self.rules ) \
          and ( match_glist( key, self.rules[ 'keep' ] ) != None ):

            # do not reassign this value
            return

        # let the parent do the assignment
        super( property_dict, self ).__setitem__( key, value )

    #=========================================================================
    def __str__( self ):
        """ build the string representation of the property list """
        buf = ''
        keys = self.keys()
        keys.sort()
        for key in keys:
            if key[ : 1 ] == property_dict._comm_sym:
                buf += '%s%s' % ( key, self.eol )
            else:
                buf += '%s%s%s%s' % (
                    key,
                    property_dict._set_sym,
                    self[ key ],
                    self.eol
                )
        return buf

    #=========================================================================
    def dump( self, handle ):
        """ dump the property list to a file handle """
        return handle.write( str( self ) )

    #=========================================================================
    def dumps( self ):
        """ dump the property list to a string """
        return str( self )

    #=========================================================================
    def load( self, handle ):
        """ load the property list from a file handle """
        self._load( handle.readlines() )

    #=========================================================================
    def loads( self, source ):
        """ load the property list from a string """
        self._load( source.strip().splitlines() )

    #=========================================================================
    def set_merge( self, handle ):
        """ set the merging property list from a file handle """
        self._load( handle.readlines() )

    #=========================================================================
    def set_merges( self, source ):
        """ set the merging property list from a string """
        self._load( source.strip().splitlines() )

    #=========================================================================
    def set_rules( self, rules ):
        """ set the property list conversion rules """
        self.rules = rules
        if 'delete' in self.rules:
            self.set_filter_globs( self.rules[ 'delete' ] )

    #=========================================================================
    def _load( self, lines ):
        """ load list of properties into object """

        # first line flag
        first = True

        # attempt to load a property from each line
        for line in lines:

            # check first line for end-of-line style
            if first == True:
                if line[ -2 : ] == '\r\n':
                    self.eol = '\r\n'
                first = False

            # strip exterior whitespace
            line = line.strip()

            # comment lines are preserved as-is
            if line[ : 1 ] == property_dict._comm_sym:
                self[ line ] = None

            # attempt to assign all property values
            else:
                ( key, value ) = line.split( property_dict._set_sym, 1 )
                self[ key ] = value


#=============================================================================
def main( argv ):
    """ script execution entry point """

    # create and configure an argument parser
    parser = argparse.ArgumentParser(
        description = 'Development and testing script for Newegg module.'
    )
    parser.add_argument(
        '-i', '--input', default = None,
        help = 'Specify property file to read'
    )
    parser.add_argument(
        '-o', '--output', default = None,
        help = 'Specify property file to write'
    )
    parser.add_argument(
        '-r', '--rules', default = None,
        help = 'Specify rule file'
    )
    parser.add_argument(
        '-m', '--merge', default = None,
        help = 'Specify property file to merge'
    )

    # the parser only wants the arguments (not the program "argument")
    args = parser.parse_args( argv[ 1 : ] )

    # create a property dictionary
    pdict = property_dict()

    # see if we are testing the module
    if args.input == None:
        pdict.set_merges( _example_merge )
        pdict.set_rules( json.loads( _example_rules ) )
        pdict.loads( _example_props )

    # normal operation
    else:
        if args.merge != None:
            pdict.set_merge( open( args.merge, 'rb' ) )
        if args.rules != None:
            pdict.set_rules( json.load( open( args.rules, 'rb' ) ) )
        pdict.load( open( args.input, 'rb' ) )

    # check for stdout-style usage
    if args.output == None:
        pdict.dump( sys.stdout )

    # dump properties to file
    else:
        pdict.dump( open( args.output, 'wb' ) )

    # return success.
    return 0

#=============================================================================
if __name__ == "__main__":
    sys.exit( main( sys.argv ) )
	#!/usr/bin/env python
	##############################################################################
	#
	# propscrub.py
	#
	# usage:
	# propscrub.py -h
	#
	# Properties configuration file scrubber. Sorts and removes entries from a
	# configuration file. It can remove top-level entries, as well as specific
	# entries.
	#
	# Conversion rules are specified in a JSON file consisting of the following
	# basic structure:
	# { "delete" : [ "glob1", "glob2" ], "keep" : [ "glob3", "glob4" ] }
	#
	# Globs look like standard file name globs using * and ? to indicate wildcards
	# for property key matching.
	#
	# Delete globs specify keys which are not permitted in the source file.
	#
	# Keep globs specify keys whos values must be preserved from a second source
	# of properties, even if they are specified in the original source file.
	#
	# Delete globs will override any functionally equivalent keep globs.
	#
	# Note: The order in which the rule set, merging source, and original source
	# must be such that all insertions into the property list can be filtered.
	# Thus, it is probably most common to use the class using this basic pattern:
	#
	# 1. pdict = property_dict()
	# 2. pdict.set_merge( open( 'merge.properties', 'rb' ) )
	# 3. pdict.set_rules( json.load( open( 'rules.json', 'rb' ) ) )
	# 4. pdict.load( open( 'original.properties', 'rb' ) )
	# 5. pdict.dump( open( 'new.properties', 'wb' ) )
	#
	# Using a different order is fine, but be aware the loading properties before
	# the rules are set will load everything. Also, loading the merging
	# properties after the rules are set, may delete things you want to keep.
	# The convenience function propscrub() uses this technique, and hides the
	# order from the user.
	#
	# Note: This probably also works fine with .ini and most Unix-style .conf
	# files. You'll just need to reassign the parse symbols which are static
	# to the property_dict class.
	#
	##############################################################################


	import argparse
	import collections
	import fnmatch
	import json
	import sys


	#=============================================================================
	_example_props = """
	#A simple comment
	#A.somewhat=terrible=comment.
	simple_option_a=1
	simple_option_b=2
	wonky\ key\ a=wonky value a
	parent.child=nested value
	grandparent.parent.child=nested nested value
	greatgrandparent.grandparent.parent.child=nested nested nested value
	good.a=1
	good.b=2
	good.c=3
	bad.a=4
	bad.b=5
	bad.c=6
	throwaway=sadface
	keep=yay!
	keepers.a=7
	keepers.b=8
	keepers.c=9
	a.a.a=aaa
	a.b.b=abb
	a.c.b=acb
	well=this=sucks
	"""

	#=============================================================================
	_example_merge = """
	#Another comment
	simple_option_a=3
	existing_option=42
	keep=haha!
	keepers.a=10
	keepers.b=11
	keepers.c=12
	keepers.d=13
	"""

	#=============================================================================
	_example_rules = """
	{
	"delete" : [
	"bad.*",
	"throwaway",
	"a.*.b"
	],
	"keep" : [
	"keepers.*",
	"keep"
	]
	}
	"""


	#=============================================================================
	def match_glist( subject, globlist ):
	""" checks a string for a match in a list of glob-style patterns """
	for index in range( len( globlist ) ):
	if fnmatch.fnmatch( subject, globlist[ index ] ) == True:
	return index
	return None


	#=============================================================================
	def propscrub( source, rules = {}, merge = None, target = None ):
	""" convenience function that can deal with file handles and names """

	# create the property dictionary object
	pdict = property_dict()

	# check for a specified merge file
	if type( merge ) is str:
	pdict.set_merge( open( merge, 'rb' ) )
	elif merge is not None:
	pdict.set_merge( merge )

	# check for a specified rule file or dictionary
	if type( rules ) is str:
	pdict.set_rules( json.load( open( rules, 'rb' ) ) )
	else:
	pdict.set_rules( rules )

	# load the source property list
	if type( source ) is str:
	pdict.load( open( source, 'rb' ) )
	else:
	pdict.load( source )

	# check for a specified output file
	if type( target ) is str:
	return pdict.dump( open( target, 'wb' ) )
	elif target is not None:
	return pdict.dump( target )
	else:
	return str( pdict )


	#=============================================================================
	class ordered_dict( collections.OrderedDict ):
	""" an OrderedDict that can create missing sub-dictionaries """

	#=========================================================================
	def __missing__( self, key ):
	""" used to create sub-dictionaries without user checks """
	self[ key ] = ordered_dict()
	return self[ key ]


	#=============================================================================
	class guarded_dict( ordered_dict ):
	""" an ordered_dict that protects itself using insertion rules """

	#=========================================================================
	def __init__( self, filter_globs = [], args, *kwargs ):
	""" initialize a new garuded_dict instance """
	super( guarded_dict, self ).__init__( args, *kwargs )
	self.filter_globs = filter_globs

	#=========================================================================
	def __setitem__( self, key, value ):
	""" override default item setting to check if this key is allowed """
	if match_glist( key, self.filter_globs ) == None:
	super( guarded_dict, self ).__setitem__( key, value )

	#=========================================================================
	def set_filter_globs( self, filter_globs ):
	""" set the list of glob-style patterns that prevent assignment """
	self.filter_globs = filter_globs


	#=============================================================================
	class property_dict( guarded_dict ):
	""" dictionary smart enough to deal with a complex property list """

	# static variables
	_comm_sym = '#' # comment lines begin with this
	_name_sym = '.' # names are separated by this
	_set_sym = '=' # key/value pairs are separated by this

	#=========================================================================
	def __init__( self, args, *kwargs ):
	""" initialize a new property_dict instance """
	super( property_dict, self ).__init__( args, *kwargs )
	self.rules = {}
	self.eol = '\n'

	#=========================================================================
	def __setitem__( self, key, value ):
	""" override item setting to check for preserved values """

	# see if there are rules for preserving values
	# and this key matches a preservation rule
	if ( 'keep' in self.rules ) \
	and ( match_glist( key, self.rules[ 'keep' ] ) != None ):

	# do not reassign this value
	return

	# let the parent do the assignment
	super( property_dict, self ).__setitem__( key, value )

	#=========================================================================
	def __str__( self ):
	""" build the string representation of the property list """
	buf = ''
	keys = self.keys()
	keys.sort()
	for key in keys:
	if key[ : 1 ] == property_dict._comm_sym:
	buf += '%s%s' % ( key, self.eol )
	else:
	buf += '%s%s%s%s' % (
	key,
	property_dict._set_sym,
	self[ key ],
	self.eol
	)
	return buf

	#=========================================================================
	def dump( self, handle ):
	""" dump the property list to a file handle """
	return handle.write( str( self ) )

	#=========================================================================
	def dumps( self ):
	""" dump the property list to a string """
	return str( self )

	#=========================================================================
	def load( self, handle ):
	""" load the property list from a file handle """
	self._load( handle.readlines() )

	#=========================================================================
	def loads( self, source ):
	""" load the property list from a string """
	self._load( source.strip().splitlines() )

	#=========================================================================
	def set_merge( self, handle ):
	""" set the merging property list from a file handle """
	self._load( handle.readlines() )

	#=========================================================================
	def set_merges( self, source ):
	""" set the merging property list from a string """
	self._load( source.strip().splitlines() )

	#=========================================================================
	def set_rules( self, rules ):
	""" set the property list conversion rules """
	self.rules = rules
	if 'delete' in self.rules:
	self.set_filter_globs( self.rules[ 'delete' ] )

	#=========================================================================
	def _load( self, lines ):
	""" load list of properties into object """

	# first line flag
	first = True

	# attempt to load a property from each line
	for line in lines:

	# check first line for end-of-line style
	if first == True:
	if line[ -2 : ] == '\r\n':
	self.eol = '\r\n'
	first = False

	# strip exterior whitespace
	line = line.strip()

	# comment lines are preserved as-is
	if line[ : 1 ] == property_dict._comm_sym:
	self[ line ] = None

	# attempt to assign all property values
	else:
	( key, value ) = line.split( property_dict._set_sym, 1 )
	self[ key ] = value


	#=============================================================================
	def main( argv ):
	""" script execution entry point """

	# create and configure an argument parser
	parser = argparse.ArgumentParser(
	description = 'Development and testing script for Newegg module.'
	)
	parser.add_argument(
	'-i', '--input', default = None,
	help = 'Specify property file to read'
	)
	parser.add_argument(
	'-o', '--output', default = None,
	help = 'Specify property file to write'
	)
	parser.add_argument(
	'-r', '--rules', default = None,
	help = 'Specify rule file'
	)
	parser.add_argument(
	'-m', '--merge', default = None,
	help = 'Specify property file to merge'
	)

	# the parser only wants the arguments (not the program "argument")
	args = parser.parse_args( argv[ 1 : ] )

	# create a property dictionary
	pdict = property_dict()

	# see if we are testing the module
	if args.input == None:
	pdict.set_merges( _example_merge )
	pdict.set_rules( json.loads( _example_rules ) )
	pdict.loads( _example_props )

	# normal operation
	else:
	if args.merge != None:
	pdict.set_merge( open( args.merge, 'rb' ) )
	if args.rules != None:
	pdict.set_rules( json.load( open( args.rules, 'rb' ) ) )
	pdict.load( open( args.input, 'rb' ) )

	# check for stdout-style usage
	if args.output == None:
	pdict.dump( sys.stdout )

	# dump properties to file
	else:
	pdict.dump( open( args.output, 'wb' ) )

	# return success.
	return 0

	#=============================================================================
	if __name__ == "__main__":
	sys.exit( main( sys.argv ) )