cg-soft/PathFilter.py

## PathFilter.py
#!/usr/bin/env python2.7

# Filter a set of file paths using wildcarded inclusion and exclusion rules
#
# Usage: See tests below
#
# Restrictions:
#    File paths are assumed to be relative paths. Leading slashes are ignored.
#    File names may not contain wildcards.
#    Supported wildcards:
#       "..." matches zero or more complete directory entries. As such, it is
#             only valid within slashes, as in .../a/b, a/.../b or such.
#       "*"   matches zero or more characters within a directory entry.
#       "[x]" matches enclosed characters, for example [a-z], [xy], [^x] (not x)...
#    No distinction is made between files and directories. If you want a rule to select
#    only a directory, you need to end it with "/*".
#    Trailing "/" or "/..." in a rule have no effect.
#    Inclusions and exclusions are merged, in the sense that whichever rule matches
#    the entry closest to the end of the path wins. For example, if you have three rules:
#       addInclude('a/b')
#       addExclude('a/b/c')
#       addInclude('a/b/c/d')
#    it means: include everything in a/b, except for things in a/b/c, but do include everything
#    in a/b/c/d.
#    If multiple rules select the same path location, the last entered rule wins
#    The output of getFilteredEntries() is a minimal set of paths listing all included locations.
#    A directory in that list means that all subdirectories and files are also included.

import re

# "Bias" are marks which apply to a specific directory, to possibly be overridden by
# anoither mark in a subdirectory.
UNMARKED = 0
INCLUDED = 1
EXCLUDED = 2

# For rendering and debugging bias marks
BIAS = ('(.)', '(+)', '(-)')

# For transforming a shell glob into a regexp
REGEX_QUOTE = re.compile(r'([\\$+{}()?.])')

def splitPath(path, noDuplicate=()):
    """ Helper function to remove duplications in paths.
        The noDuplicate set contains entries like '...' to be
        removed as duplicates (eg '.../...') or at the end of the path.
        Entries need to be non-blank, to collapse consecutive slashes.
    """
    cleanedPath = []
    for entry in path.split('/'):
        if entry:
            if entry in noDuplicate:
                if cleanedPath and cleanedPath[-1] == entry:
                    continue
            cleanedPath.append(entry)
    while cleanedPath[-1] in noDuplicate:
        cleanedPath.pop()
    return cleanedPath

def joinPath(path):
    """ Helper function to recreate a proper path.
        Since root's name is blank, strip off the first slash.
    """
    return "/".join(path).lstrip('/')

class PathFilterException:
    def __init__(self, error):
        self.error = error

    def __str__(self):
        return str(self.error)

class PathNode:
    def __init__(self, name=''):
        """ Helper class to represent a single directory node """
        self.name = name
        self.mark = UNMARKED
        self.seen = False
        self.unfilteredKids = 0
        self.filteredKids = 0
        self.kids = {}

    def reset(self):
        self.mark = UNMARKED
        self.filteredKids = 0
        if self.seen:
            self.seen = False
            for kid in self.kids:
                self.kids[kid].reset()

    def addEntry(self, path):
        """ Add a permanent directory entry """
        tail = path[1:]
        head = path[0]
        if '*' in head or '[' in head or ']' in head or head == '...':
            raise PathFilterException('Unsupported character sequence in entry')
        if head in self.kids:
            if tail:
                self.kids[head].addEntry(tail)
            else:
                raise PathFilterException('Duplicate entry')
        else:
            self.kids[head] = PathNode(head)
            if tail:
                self.kids[head].addEntry(tail)
        self.unfilteredKids += 1
        return self

    def addFilter(self, path, bias=INCLUDED):
        """ Add a filter condition. This sets the bias marks  """
        # List of marked items, used both to check whether the rule
        # selected something and to keep track so that reset() is cheap.
        self.seen = True

        # if we are leaf, just set the requested bias
        if not path:
            self.mark = bias
            return 1

        # if we are not the leaf, continue processing
        marked = 0
        tail = path[1:]
        head = path[0]

        # "..." matches zero or more directories, so....
        if head == '...':
            # tail must be non-empty, because splitPath() enforces no trailing ...
            head = tail.pop(0)

            # process path as passed in on all kids, thereby skipping the current dir
            for kid in self.kids:
                marked += self.kids[kid].addFilter(path, bias)

        # Deal with wildcards, if present
        if head == '*':
            # simplified case...
            for kid in self.kids:
                marked += self.kids[kid].addFilter(tail, bias)
        elif '*' in head or ('[' in head and ']' in head):
            try:
                headRe = re.compile(REGEX_QUOTE.sub(r'\\\1', head).replace('*','.*'))
            except:
                raise PathFilterException('Unable to process wildcards')
            #print "RE for", head, "is", headRe.pattern
            for kid in self.kids:
                if headRe.match(kid):
                    marked += self.kids[kid].addFilter(tail, bias)

        # Otherwise check if there is an exact match
        elif head in self.kids:
            marked += self.kids[head].addFilter(tail, bias)
        return marked

    def getUnfilteredList(self):
        """ Generator returning a dump of the engine state """
        if self.seen:
            sep = ''
        else:
            sep = '!'
        if self.kids:
            for kid in sorted(self.kids.keys()):
                for entry in self.kids[kid].getUnfilteredList():
                    yield [ self.name + BIAS[self.mark] + '[%d/%d]%s' % (self.unfilteredKids, self.filteredKids, sep) ] + entry
        else:
            yield [ self.name + BIAS[self.mark] + '[%d/%d]%s' % (self.unfilteredKids, self.filteredKids, sep) ]

    def setFilteredCount(self, bias):
        """ Count number of kids selected by current rule set.
            This can be compared with the unfilteredKids to decide whether
            to return just this node, or continue recursing into sub-nodes.
        """
        self.filteredKids = 0
        if self.seen:
            if self.mark != UNMARKED:
                bias = self.mark
            if self.kids:
                for kid in self.kids:
                    self.filteredKids += self.kids[kid].setFilteredCount(bias)
            elif bias == INCLUDED:
                # this is a leaf node, so if selected it will stand out
                self.filteredKids = 1
        elif bias == INCLUDED:
            # If no rule has touched this node, we either return everything or nothing
            if self.kids:
                self.filteredKids = self.unfilteredKids
            else:
                self.filteredKids = 1

        return self.filteredKids

    def getFilteredList(self):
        """ Traverse the tree, dumping the node if either:
            - there are kids, but all kids are included
            - there are no kids, but the leaf node is selected
        """
        if (self.unfilteredKids > 0 and self.filteredKids == self.unfilteredKids) or\
           (self.filteredKids > 0 and self.unfilteredKids == 0):
            yield [ self.name ]
        else:
            for kid in sorted(self.kids.keys()):
                for entry in self.kids[kid].getFilteredList():
                    yield [ self.name ] + entry

class PathFilter:
    def __init__(self):
        """ Filter a set of file paths using wildcarded inclusion and exclusion rules

            Usage: See tests below

            Restrictions:

            File paths are assumed to be relative paths. Leading slashes are ignored.

            File names may not contain wildcards.

            Supported wildcards:
              "..." matches zero or more complete directory entries. As such, it is
                    only valid within slashes, as in .../a/b, a/.../b or such.
              "*"   matches zero or more characters within a directory entry.
              "[x]" matches enclosed characters, for example [a-z], [xy], [^x] (not x)...

            No distinction is made between files and directories. If you want a rule to select
            only a directory, you need to end it with "/*".

            Trailing "/" or "/..." in a rule have no effect.

            Inclusions and exclusions are merged, in the sense that whichever rule matches
            the entry closest to the end of the path wins. For example, if you have three rules:
               addInclude('a/b')
               addExclude('a/b/c')
               addInclude('a/b/c/d')
            it means: include everything in a/b, except for things in a/b/c, but do include everything
            in a/b/c/d.
            If multiple rules select the same path location, the last entered rule wins

            The output of getFilteredEntries() is a minimal set of paths listing all included locations.
            A directory in that list means that all subdirectories and files are also included.
        """
        self.root = PathNode()
        self.marked = []

    def reset(self):
        """ Delete all rules and all traces of processing. """
        self.root.reset()
        return self

    def addEntry(self, path):
        """ Add a selectable path entry. This must be a unique and complete file path. """
        try:
            self.root.addEntry(splitPath(path))
        except PathFilterException, e:
            # re-raise exception, adding the user supplied input for easy reporting
            raise PathFilterException("%s: %s" % (str(e), path))
        return self

    def addEntries(self, paths):
        """ Add a list of paths, returning a collected list of exceptions. """
        exceptions = []
        for path in paths:
            try:
                self.addEntry(path)
            except PathFilterException, e:
                exceptions.append(str(e))
        return exceptions

    def addFilter(self, path, bias=INCLUDED):
        """ Add an inclusion or an exclusion rule. """
        try:
            marked = self.root.addFilter(splitPath(path, ('...',)), bias)
        except PathFilterException, e:
            # re-raise exception, adding the user supplied input for easy reporting
            raise PathFilterException("%s: %s" % (str(e), path))
        if not marked:
            raise PathFilterException("Filter did not select any items: %s" % path)
        return self

    def addIncluded(self, path):
        """ Add an inclusion rule """
        return self.addFilter(path, INCLUDED)

    def addExcluded(self, path):
        """ Add an exclusion rule """
        return self.addFilter(path, EXCLUDED)

    def addFilters(self, paths):
        """ Add multiple filters. Filters with a "!" at the beginning are exclusions,
            everything else is an inclusion. Order matters, the last rule wins over
            previous rules matching the same entry.
        """
        # we return the list of exceptions instead of throwing one simply because
        # this is a batch operation, and my general rule on batch operations is to
        # try as many as you can in order to collect a large number of errors, instead
        # of just bailing at the first one.
        exceptions = []

        for path in paths:
            path = path.strip()
            try:
                if path.startswith('!'):
                    self.addExcluded(path[1:])
                else:
                    self.addIncluded(path)
            except PathFilterException, e:
                exceptions.append(str(e))
        return exceptions

    def getFilteredList(self):
        """ Generator returning the list of included paths """
        # By default, nothing is selected, so we start our traversal
        # with bias=EXCLUDED. We must run into a mark=INCLUDED for
        # things to start being counted as selected.
        self.root.setFilteredCount(EXCLUDED)

        # setFilteredCount actually does the hard work, here we
        # simply dump out recursively...
        for entry in self.root.getFilteredList():
            yield joinPath(entry)

    def getUnfilteredList(self):
        """ Dump the current engine state """
        for entry in self.root.getUnfilteredList():
            yield joinPath(entry)

    def dumpFilteredList(self, title):
        """ Test helper function to dump a filtered list """
        empty = True
        print "===[ %s ]===" % title
        for entry in self.getFilteredList():
            print ">%s<" % entry
            empty = False
        if empty:
            print "empty"

    def dumpUnfilteredList(self, title):
        """ Test helper function to dump the current engine state """
        empty = True
        print "===[ %s ]===" % title
        for entry in self.getUnfilteredList():
            print ">%s<" % entry
            empty = False
        if empty:
            print "empty"

if __name__ == '__main__':
    f = PathFilter()
    f.addEntry('my-dir-a/my-dir-b/1')
    f.addEntry('//my-dir-a///my-dir-b/2')
    f.addEntry('my-dir-a/my-dir-b/3')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/1')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/2')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/3')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/2')
    f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/3')
    f.addEntry('your-dir-a/your-dir-b/1')
    f.addEntry('your-dir-a/your-dir-b/2')
    f.addEntry('your-dir-a/your-dir-b/3')
    f.addEntry('your-dir-a/your-dir-b/3.text')
    f.addEntry('your-dir-a/your-dir-b/3Xtext')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/1')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/2')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/3')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/1')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/2')
    f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/3')
    f.addEntry('your-dir-a/x1/x2/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
    f.addEntry('your-dir-a/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
    f.addEntry('your-dir-a/x1/x2/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/your-dir-y/999')
    f.addEntry('your-dir-a/x1/your-dir-b/y1/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
    f.addEntry('your-dir-a/x1/x2/x3/your-dir-b/y1/y2/your-dir-x/z1/z2/your-dir-y/999')
    f.addEntry('your-dir-a/x1/your-dir-b/y1/your-dir-x/z1/your-dir-y/999')
    for entry in ('my-dir-a', 'a/.../b', 'a/x*b/c'):
        try:
            f.addEntry(entry)
        except PathFilterException, e:
            print "Caught exception %s, good" % str(e)
        else:
            print "Expected exception."
    f.dumpFilteredList('Initial load - should be empty)')
    #f.dumpUnfilteredList('Initial load - should be everything)')
    f.addIncluded('my-dir-a/my-dir-b/my-dir-x')
    f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x')
    #f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x')
    f.addExcluded('my-dir-a/my-dir-b/my-dir-x/my-dir-y')
    f.dumpFilteredList('Excluding my-dir-a/my-dir-b/my-dir-x/my-dir-y')
    #f.dumpUnfilteredList('Excluding my-dir-a/my-dir-b/my-dir-x/my-dir-y')
    f.addIncluded('my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
    f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
    #f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
    f.reset()
    f.dumpFilteredList('After reset')
    #f.dumpUnfilteredList('After reset')
    f.addIncluded('my-dir-a/my-dir-b/my-dir-x')
    #f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
    f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x')
    f.addIncluded('my-dir-a/my-dir-b/my-dir-x/*')
    f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x (should be the same)')
    try:
        f.addIncluded('.../1/*')
    except PathFilterException, e:
        print "Caught exception %s, good" % str(e)
    else:
        print "Expected exception."
    f.dumpFilteredList('Including .../1/* (should be the same)')
    f.reset()
    f.dumpFilteredList('After reset')
    f.addIncluded('.../1')
    f.dumpFilteredList('Including .../1')
    f.addExcluded('.../my-dir-x/1')
    f.dumpFilteredList('Excluding .../my-dir-x/1')
    f.addIncluded('your-dir-a/...')
    f.dumpFilteredList('Including your-dir-a/...')
    f.addExcluded('.../your-dir-*')
    f.dumpFilteredList('Excluding .../your-dir-*')
    f.reset()
    f.dumpFilteredList('After reset')
    f.addIncluded('.../.../1')
    f.dumpFilteredList('Including .../.../1')
    try:
        f.addIncluded('.../.../1/*')
    except PathFilterException, e:
        print "Caught exception %s, good" % str(e)
    else:
        print "Expected exception."
    f.dumpFilteredList('Including .../.../1/*')
    f.addExcluded('.../.../my-dir-x/1')
    f.dumpFilteredList('Excluding .../.../my-dir-x/1')
    f.addIncluded('your-dir-a/.../...')
    f.dumpFilteredList('Including your-dir-a/.../...')
    f.addExcluded('.../.../your-dir-*')
    f.dumpFilteredList('Excluding .../.../your-dir-*')
    f.reset()
    f.dumpFilteredList('After reset')
    f.addIncluded('your-dir-a/.../your-dir-b/.../your-dir-x/.../your-dir-y')
    f.dumpFilteredList('Include your-dir-a/.../your-dir-b/.../your-dir-x/.../your-dir-y')
    f.addExcluded('.../[1-3]')
    f.dumpFilteredList('Exclude .../[1-3]')
    f.addExcluded('.../y2')
    f.dumpFilteredList('Exclude .../y2')
    f.addExcluded('.../y2/.../999')
    f.dumpFilteredList('Exclude .../y2/.../999')
    f.reset()
    f.dumpFilteredList('After reset')
    f.addIncluded('your-dir-a/*/your-dir-b/*/your-dir-x/*/your-dir-y')
    f.dumpFilteredList('Include your-dir-a/*/your-dir-b/*/your-dir-x/*/your-dir-y')
    f.reset()
    f.dumpFilteredList('After reset')
    f.addIncluded('.../*.text')
    f.dumpFilteredList('Include .../*.text (should not match 3Xtext)')

    print "try again with batch oriented functions"
    f = PathFilter()
    errors = f.addEntries(['my-dir-a/my-dir-b/1',
                           '//my-dir-a///my-dir-b/2',
                           'my-dir-a/my-dir-b/3',
                           'my-dir-a/my-dir-b/my-dir-x/1',
                           'my-dir-a/my-dir-b/my-dir-x/2',
                           'my-dir-a/my-dir-b/my-dir-x/3',
                           'my-dir-a/my-dir-b/my-dir-x/my-dir-y/1',
                           'my-dir-a/my-dir-b/my-dir-x/my-dir-y/2',
                           'my-dir-a/my-dir-b/my-dir-x/my-dir-y/3',
                           'your-dir-a/your-dir-b/1',
                           'your-dir-a/your-dir-b/2',
                           'your-dir-a/your-dir-b/3',
                           'your-dir-a/your-dir-b/3.text',
                           'your-dir-a/your-dir-b/3Xtext',
                           'your-dir-a/your-dir-b/your-dir-x/1',
                           'your-dir-a/your-dir-b/your-dir-x/2',
                           'your-dir-a/your-dir-b/your-dir-x/3',
                           'your-dir-a/your-dir-b/your-dir-x/your-dir-y/1',
                           'your-dir-a/your-dir-b/your-dir-x/your-dir-y/2',
                           'your-dir-a/your-dir-b/your-dir-x/your-dir-y/3',
                           'your-dir-a/x1/x2/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
                           'your-dir-a/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
                           'your-dir-a/x1/x2/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/your-dir-y/999',
                           'your-dir-a/x1/your-dir-b/y1/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
                           'your-dir-a/x1/x2/x3/your-dir-b/y1/y2/your-dir-x/z1/z2/your-dir-y/999',
                           'your-dir-a/x1/your-dir-b/y1/your-dir-x/z1/your-dir-y/999',
                           'my-dir-a',
                           'a/.../b',
                           'a/x*b/c'])
    print "Exceptions in initial load"
    print "\n".join(errors)
    f.dumpFilteredList('Initial load - should be empty)')
    f.dumpUnfilteredList('Initial load - should be everything)')
    f.dumpFilteredList('After reset')
    errors = f.addFilters(['.../.../1',
                           '.../.../1/*',
                           '!.../.../my-dir-x/1'])
    print "Exceptions in adding filters"
    print "\n".join(errors)
    f.dumpFilteredList('Including .../.../1, Excluding .../.../my-dir-x/1')
	#!/usr/bin/env python2.7

	# Filter a set of file paths using wildcarded inclusion and exclusion rules
	#
	# Usage: See tests below
	#
	# Restrictions:
	# File paths are assumed to be relative paths. Leading slashes are ignored.
	# File names may not contain wildcards.
	# Supported wildcards:
	# "..." matches zero or more complete directory entries. As such, it is
	# only valid within slashes, as in .../a/b, a/.../b or such.
	# "*" matches zero or more characters within a directory entry.
	# "[x]" matches enclosed characters, for example [a-z], [xy], [^x] (not x)...
	# No distinction is made between files and directories. If you want a rule to select
	# only a directory, you need to end it with "/*".
	# Trailing "/" or "/..." in a rule have no effect.
	# Inclusions and exclusions are merged, in the sense that whichever rule matches
	# the entry closest to the end of the path wins. For example, if you have three rules:
	# addInclude('a/b')
	# addExclude('a/b/c')
	# addInclude('a/b/c/d')
	# it means: include everything in a/b, except for things in a/b/c, but do include everything
	# in a/b/c/d.
	# If multiple rules select the same path location, the last entered rule wins
	# The output of getFilteredEntries() is a minimal set of paths listing all included locations.
	# A directory in that list means that all subdirectories and files are also included.

	import re

	# "Bias" are marks which apply to a specific directory, to possibly be overridden by
	# anoither mark in a subdirectory.
	UNMARKED = 0
	INCLUDED = 1
	EXCLUDED = 2

	# For rendering and debugging bias marks
	BIAS = ('(.)', '(+)', '(-)')

	# For transforming a shell glob into a regexp
	REGEX_QUOTE = re.compile(r'([\\$+{}()?.])')

	def splitPath(path, noDuplicate=()):
	""" Helper function to remove duplications in paths.
	The noDuplicate set contains entries like '...' to be
	removed as duplicates (eg '.../...') or at the end of the path.
	Entries need to be non-blank, to collapse consecutive slashes.
	"""
	cleanedPath = []
	for entry in path.split('/'):
	if entry:
	if entry in noDuplicate:
	if cleanedPath and cleanedPath[-1] == entry:
	continue
	cleanedPath.append(entry)
	while cleanedPath[-1] in noDuplicate:
	cleanedPath.pop()
	return cleanedPath

	def joinPath(path):
	""" Helper function to recreate a proper path.
	Since root's name is blank, strip off the first slash.
	"""
	return "/".join(path).lstrip('/')

	class PathFilterException:
	def __init__(self, error):
	self.error = error

	def __str__(self):
	return str(self.error)

	class PathNode:
	def __init__(self, name=''):
	""" Helper class to represent a single directory node """
	self.name = name
	self.mark = UNMARKED
	self.seen = False
	self.unfilteredKids = 0
	self.filteredKids = 0
	self.kids = {}

	def reset(self):
	self.mark = UNMARKED
	self.filteredKids = 0
	if self.seen:
	self.seen = False
	for kid in self.kids:
	self.kids[kid].reset()

	def addEntry(self, path):
	""" Add a permanent directory entry """
	tail = path[1:]
	head = path[0]
	if '*' in head or '[' in head or ']' in head or head == '...':
	raise PathFilterException('Unsupported character sequence in entry')
	if head in self.kids:
	if tail:
	self.kids[head].addEntry(tail)
	else:
	raise PathFilterException('Duplicate entry')
	else:
	self.kids[head] = PathNode(head)
	if tail:
	self.kids[head].addEntry(tail)
	self.unfilteredKids += 1
	return self

	def addFilter(self, path, bias=INCLUDED):
	""" Add a filter condition. This sets the bias marks """
	# List of marked items, used both to check whether the rule
	# selected something and to keep track so that reset() is cheap.
	self.seen = True

	# if we are leaf, just set the requested bias
	if not path:
	self.mark = bias
	return 1

	# if we are not the leaf, continue processing
	marked = 0
	tail = path[1:]
	head = path[0]

	# "..." matches zero or more directories, so....
	if head == '...':
	# tail must be non-empty, because splitPath() enforces no trailing ...
	head = tail.pop(0)

	# process path as passed in on all kids, thereby skipping the current dir
	for kid in self.kids:
	marked += self.kids[kid].addFilter(path, bias)

	# Deal with wildcards, if present
	if head == '*':
	# simplified case...
	for kid in self.kids:
	marked += self.kids[kid].addFilter(tail, bias)
	elif '*' in head or ('[' in head and ']' in head):
	try:
	headRe = re.compile(REGEX_QUOTE.sub(r'\\\1', head).replace('','.'))
	except:
	raise PathFilterException('Unable to process wildcards')
	#print "RE for", head, "is", headRe.pattern
	for kid in self.kids:
	if headRe.match(kid):
	marked += self.kids[kid].addFilter(tail, bias)

	# Otherwise check if there is an exact match
	elif head in self.kids:
	marked += self.kids[head].addFilter(tail, bias)
	return marked

	def getUnfilteredList(self):
	""" Generator returning a dump of the engine state """
	if self.seen:
	sep = ''
	else:
	sep = '!'
	if self.kids:
	for kid in sorted(self.kids.keys()):
	for entry in self.kids[kid].getUnfilteredList():
	yield [ self.name + BIAS[self.mark] + '[%d/%d]%s' % (self.unfilteredKids, self.filteredKids, sep) ] + entry
	else:
	yield [ self.name + BIAS[self.mark] + '[%d/%d]%s' % (self.unfilteredKids, self.filteredKids, sep) ]

	def setFilteredCount(self, bias):
	""" Count number of kids selected by current rule set.
	This can be compared with the unfilteredKids to decide whether
	to return just this node, or continue recursing into sub-nodes.
	"""
	self.filteredKids = 0
	if self.seen:
	if self.mark != UNMARKED:
	bias = self.mark
	if self.kids:
	for kid in self.kids:
	self.filteredKids += self.kids[kid].setFilteredCount(bias)
	elif bias == INCLUDED:
	# this is a leaf node, so if selected it will stand out
	self.filteredKids = 1
	elif bias == INCLUDED:
	# If no rule has touched this node, we either return everything or nothing
	if self.kids:
	self.filteredKids = self.unfilteredKids
	else:
	self.filteredKids = 1

	return self.filteredKids

	def getFilteredList(self):
	""" Traverse the tree, dumping the node if either:
	- there are kids, but all kids are included
	- there are no kids, but the leaf node is selected
	"""
	if (self.unfilteredKids > 0 and self.filteredKids == self.unfilteredKids) or\
	(self.filteredKids > 0 and self.unfilteredKids == 0):
	yield [ self.name ]
	else:
	for kid in sorted(self.kids.keys()):
	for entry in self.kids[kid].getFilteredList():
	yield [ self.name ] + entry

	class PathFilter:
	def __init__(self):
	""" Filter a set of file paths using wildcarded inclusion and exclusion rules

	Usage: See tests below

	Restrictions:

	File paths are assumed to be relative paths. Leading slashes are ignored.

	File names may not contain wildcards.

	Supported wildcards:
	"..." matches zero or more complete directory entries. As such, it is
	only valid within slashes, as in .../a/b, a/.../b or such.
	"*" matches zero or more characters within a directory entry.
	"[x]" matches enclosed characters, for example [a-z], [xy], [^x] (not x)...

	No distinction is made between files and directories. If you want a rule to select
	only a directory, you need to end it with "/*".

	Trailing "/" or "/..." in a rule have no effect.

	Inclusions and exclusions are merged, in the sense that whichever rule matches
	the entry closest to the end of the path wins. For example, if you have three rules:
	addInclude('a/b')
	addExclude('a/b/c')
	addInclude('a/b/c/d')
	it means: include everything in a/b, except for things in a/b/c, but do include everything
	in a/b/c/d.
	If multiple rules select the same path location, the last entered rule wins

	The output of getFilteredEntries() is a minimal set of paths listing all included locations.
	A directory in that list means that all subdirectories and files are also included.
	"""
	self.root = PathNode()
	self.marked = []

	def reset(self):
	""" Delete all rules and all traces of processing. """
	self.root.reset()
	return self

	def addEntry(self, path):
	""" Add a selectable path entry. This must be a unique and complete file path. """
	try:
	self.root.addEntry(splitPath(path))
	except PathFilterException, e:
	# re-raise exception, adding the user supplied input for easy reporting
	raise PathFilterException("%s: %s" % (str(e), path))
	return self

	def addEntries(self, paths):
	""" Add a list of paths, returning a collected list of exceptions. """
	exceptions = []
	for path in paths:
	try:
	self.addEntry(path)
	except PathFilterException, e:
	exceptions.append(str(e))
	return exceptions

	def addFilter(self, path, bias=INCLUDED):
	""" Add an inclusion or an exclusion rule. """
	try:
	marked = self.root.addFilter(splitPath(path, ('...',)), bias)
	except PathFilterException, e:
	# re-raise exception, adding the user supplied input for easy reporting
	raise PathFilterException("%s: %s" % (str(e), path))
	if not marked:
	raise PathFilterException("Filter did not select any items: %s" % path)
	return self

	def addIncluded(self, path):
	""" Add an inclusion rule """
	return self.addFilter(path, INCLUDED)

	def addExcluded(self, path):
	""" Add an exclusion rule """
	return self.addFilter(path, EXCLUDED)

	def addFilters(self, paths):
	""" Add multiple filters. Filters with a "!" at the beginning are exclusions,
	everything else is an inclusion. Order matters, the last rule wins over
	previous rules matching the same entry.
	"""
	# we return the list of exceptions instead of throwing one simply because
	# this is a batch operation, and my general rule on batch operations is to
	# try as many as you can in order to collect a large number of errors, instead
	# of just bailing at the first one.
	exceptions = []

	for path in paths:
	path = path.strip()
	try:
	if path.startswith('!'):
	self.addExcluded(path[1:])
	else:
	self.addIncluded(path)
	except PathFilterException, e:
	exceptions.append(str(e))
	return exceptions

	def getFilteredList(self):
	""" Generator returning the list of included paths """
	# By default, nothing is selected, so we start our traversal
	# with bias=EXCLUDED. We must run into a mark=INCLUDED for
	# things to start being counted as selected.
	self.root.setFilteredCount(EXCLUDED)

	# setFilteredCount actually does the hard work, here we
	# simply dump out recursively...
	for entry in self.root.getFilteredList():
	yield joinPath(entry)

	def getUnfilteredList(self):
	""" Dump the current engine state """
	for entry in self.root.getUnfilteredList():
	yield joinPath(entry)

	def dumpFilteredList(self, title):
	""" Test helper function to dump a filtered list """
	empty = True
	print "===[ %s ]===" % title
	for entry in self.getFilteredList():
	print ">%s<" % entry
	empty = False
	if empty:
	print "empty"

	def dumpUnfilteredList(self, title):
	""" Test helper function to dump the current engine state """
	empty = True
	print "===[ %s ]===" % title
	for entry in self.getUnfilteredList():
	print ">%s<" % entry
	empty = False
	if empty:
	print "empty"

	if __name__ == '__main__':
	f = PathFilter()
	f.addEntry('my-dir-a/my-dir-b/1')
	f.addEntry('//my-dir-a///my-dir-b/2')
	f.addEntry('my-dir-a/my-dir-b/3')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/1')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/2')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/3')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/2')
	f.addEntry('my-dir-a/my-dir-b/my-dir-x/my-dir-y/3')
	f.addEntry('your-dir-a/your-dir-b/1')
	f.addEntry('your-dir-a/your-dir-b/2')
	f.addEntry('your-dir-a/your-dir-b/3')
	f.addEntry('your-dir-a/your-dir-b/3.text')
	f.addEntry('your-dir-a/your-dir-b/3Xtext')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/1')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/2')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/3')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/1')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/2')
	f.addEntry('your-dir-a/your-dir-b/your-dir-x/your-dir-y/3')
	f.addEntry('your-dir-a/x1/x2/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
	f.addEntry('your-dir-a/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
	f.addEntry('your-dir-a/x1/x2/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/your-dir-y/999')
	f.addEntry('your-dir-a/x1/your-dir-b/y1/y3/your-dir-x/z1/z2/z3/your-dir-y/999')
	f.addEntry('your-dir-a/x1/x2/x3/your-dir-b/y1/y2/your-dir-x/z1/z2/your-dir-y/999')
	f.addEntry('your-dir-a/x1/your-dir-b/y1/your-dir-x/z1/your-dir-y/999')
	for entry in ('my-dir-a', 'a/.../b', 'a/x*b/c'):
	try:
	f.addEntry(entry)
	except PathFilterException, e:
	print "Caught exception %s, good" % str(e)
	else:
	print "Expected exception."
	f.dumpFilteredList('Initial load - should be empty)')
	#f.dumpUnfilteredList('Initial load - should be everything)')
	f.addIncluded('my-dir-a/my-dir-b/my-dir-x')
	f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x')
	#f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x')
	f.addExcluded('my-dir-a/my-dir-b/my-dir-x/my-dir-y')
	f.dumpFilteredList('Excluding my-dir-a/my-dir-b/my-dir-x/my-dir-y')
	#f.dumpUnfilteredList('Excluding my-dir-a/my-dir-b/my-dir-x/my-dir-y')
	f.addIncluded('my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
	f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
	#f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
	f.reset()
	f.dumpFilteredList('After reset')
	#f.dumpUnfilteredList('After reset')
	f.addIncluded('my-dir-a/my-dir-b/my-dir-x')
	#f.dumpUnfilteredList('Including my-dir-a/my-dir-b/my-dir-x/my-dir-y/1')
	f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x')
	f.addIncluded('my-dir-a/my-dir-b/my-dir-x/*')
	f.dumpFilteredList('Including my-dir-a/my-dir-b/my-dir-x (should be the same)')
	try:
	f.addIncluded('.../1/*')
	except PathFilterException, e:
	print "Caught exception %s, good" % str(e)
	else:
	print "Expected exception."
	f.dumpFilteredList('Including .../1/* (should be the same)')
	f.reset()
	f.dumpFilteredList('After reset')
	f.addIncluded('.../1')
	f.dumpFilteredList('Including .../1')
	f.addExcluded('.../my-dir-x/1')
	f.dumpFilteredList('Excluding .../my-dir-x/1')
	f.addIncluded('your-dir-a/...')
	f.dumpFilteredList('Including your-dir-a/...')
	f.addExcluded('.../your-dir-*')
	f.dumpFilteredList('Excluding .../your-dir-*')
	f.reset()
	f.dumpFilteredList('After reset')
	f.addIncluded('.../.../1')
	f.dumpFilteredList('Including .../.../1')
	try:
	f.addIncluded('.../.../1/*')
	except PathFilterException, e:
	print "Caught exception %s, good" % str(e)
	else:
	print "Expected exception."
	f.dumpFilteredList('Including .../.../1/*')
	f.addExcluded('.../.../my-dir-x/1')
	f.dumpFilteredList('Excluding .../.../my-dir-x/1')
	f.addIncluded('your-dir-a/.../...')
	f.dumpFilteredList('Including your-dir-a/.../...')
	f.addExcluded('.../.../your-dir-*')
	f.dumpFilteredList('Excluding .../.../your-dir-*')
	f.reset()
	f.dumpFilteredList('After reset')
	f.addIncluded('your-dir-a/.../your-dir-b/.../your-dir-x/.../your-dir-y')
	f.dumpFilteredList('Include your-dir-a/.../your-dir-b/.../your-dir-x/.../your-dir-y')
	f.addExcluded('.../[1-3]')
	f.dumpFilteredList('Exclude .../[1-3]')
	f.addExcluded('.../y2')
	f.dumpFilteredList('Exclude .../y2')
	f.addExcluded('.../y2/.../999')
	f.dumpFilteredList('Exclude .../y2/.../999')
	f.reset()
	f.dumpFilteredList('After reset')
	f.addIncluded('your-dir-a//your-dir-b//your-dir-x/*/your-dir-y')
	f.dumpFilteredList('Include your-dir-a//your-dir-b//your-dir-x/*/your-dir-y')
	f.reset()
	f.dumpFilteredList('After reset')
	f.addIncluded('.../*.text')
	f.dumpFilteredList('Include .../*.text (should not match 3Xtext)')

	print "try again with batch oriented functions"
	f = PathFilter()
	errors = f.addEntries(['my-dir-a/my-dir-b/1',
	'//my-dir-a///my-dir-b/2',
	'my-dir-a/my-dir-b/3',
	'my-dir-a/my-dir-b/my-dir-x/1',
	'my-dir-a/my-dir-b/my-dir-x/2',
	'my-dir-a/my-dir-b/my-dir-x/3',
	'my-dir-a/my-dir-b/my-dir-x/my-dir-y/1',
	'my-dir-a/my-dir-b/my-dir-x/my-dir-y/2',
	'my-dir-a/my-dir-b/my-dir-x/my-dir-y/3',
	'your-dir-a/your-dir-b/1',
	'your-dir-a/your-dir-b/2',
	'your-dir-a/your-dir-b/3',
	'your-dir-a/your-dir-b/3.text',
	'your-dir-a/your-dir-b/3Xtext',
	'your-dir-a/your-dir-b/your-dir-x/1',
	'your-dir-a/your-dir-b/your-dir-x/2',
	'your-dir-a/your-dir-b/your-dir-x/3',
	'your-dir-a/your-dir-b/your-dir-x/your-dir-y/1',
	'your-dir-a/your-dir-b/your-dir-x/your-dir-y/2',
	'your-dir-a/your-dir-b/your-dir-x/your-dir-y/3',
	'your-dir-a/x1/x2/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
	'your-dir-a/x3/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
	'your-dir-a/x1/x2/your-dir-b/y1/y2/y3/your-dir-x/z1/z2/your-dir-y/999',
	'your-dir-a/x1/your-dir-b/y1/y3/your-dir-x/z1/z2/z3/your-dir-y/999',
	'your-dir-a/x1/x2/x3/your-dir-b/y1/y2/your-dir-x/z1/z2/your-dir-y/999',
	'your-dir-a/x1/your-dir-b/y1/your-dir-x/z1/your-dir-y/999',
	'my-dir-a',
	'a/.../b',
	'a/x*b/c'])
	print "Exceptions in initial load"
	print "\n".join(errors)
	f.dumpFilteredList('Initial load - should be empty)')
	f.dumpUnfilteredList('Initial load - should be everything)')
	f.dumpFilteredList('After reset')
	errors = f.addFilters(['.../.../1',
	'.../.../1/*',
	'!.../.../my-dir-x/1'])
	print "Exceptions in adding filters"
	print "\n".join(errors)
	f.dumpFilteredList('Including .../.../1, Excluding .../.../my-dir-x/1')