agriffis/gist:1343386

## gistfile1.py
def isplit(patt, s, flags=None):
    """Return a generator that behaves similarly to re.split, with the
    following differences:

        1. It's a generator, not a list.

        2. Zero-width separators work properly
           (see http://bugs.python.org/issue3262)

        3. The sequence always includes the separators, similar to calling
           re.split(r'(patt)', s)

    Note there will always be an odd number of elements generated, because
    the list always starts and ends with content.
    """
    kwargs = {}
    if flags is not None:
        kwargs['flags'] = flags
    sepi = re.finditer(patt, s, **kwargs)

    class FakeMatchObj(object):
        def end(self):
            return 0
    prevm, m, nextm = None, FakeMatchObj(), next(sepi, None)

    while nextm:
        prevm, m, nextm = m, nextm, next(sepi, None)

        # There are two zero-width separator special cases to handle:
        #
        #   1. zero-width separator immediately following another separator
        #      (or the start-of-string), for example matching \b
        #      immediately after matching \s+
        #
        #   2. zero-width separator matching immediately prior to another
        #      separator, for example matching \b immediately prior to
        #      matching \s+
        #
        # The first case is easy to handle, see the "if...continue" below.
        #
        # The second case may be impossible to handle, because finditer
        # seems to consider the matches to be overlapping in that case
        # (presumably because they both start at the same cursor position,
        # even though the zero-width case doesn't consume any characters).
        # Therefore we include a loop to handle this second case, but
        # it is probably ineffective and in fact the only solution is
        # for the user to order their alternatives properly:
        # r'\s+|\b' rather than r'\b|\s+'

        if m.start() == m.end() == prevm.end():
            # Skip a zero-width separator immediately following
            # another separator (or start-of-string).
            continue

        while nextm and m.start() == m.end() == nextm.start():
            # Try to find a non-zero width separator at this point
            # before accepting this one. (but see the note above)
            m, nextm = nextm, next(sepi, None)

        # Yield the content prior to this separator.
        yield s[prevm.end():m.start()]

        if m.start() == len(s):
            # Don't yield the end-of-string as a zero-length
            # separator. We're done.
            return

        # Yield this separator.
        yield s[m.start():m.end()]

    # There's always content following the last separator.
    yield s[m.end():]
	def isplit(patt, s, flags=None):
	"""Return a generator that behaves similarly to re.split, with the
	following differences:

	1. It's a generator, not a list.

	2. Zero-width separators work properly
	(see http://bugs.python.org/issue3262)

	3. The sequence always includes the separators, similar to calling
	re.split(r'(patt)', s)

	Note there will always be an odd number of elements generated, because
	the list always starts and ends with content.
	"""
	kwargs = {}
	if flags is not None:
	kwargs['flags'] = flags
	sepi = re.finditer(patt, s, **kwargs)

	class FakeMatchObj(object):
	def end(self):
	return 0
	prevm, m, nextm = None, FakeMatchObj(), next(sepi, None)

	while nextm:
	prevm, m, nextm = m, nextm, next(sepi, None)

	# There are two zero-width separator special cases to handle:
	#
	# 1. zero-width separator immediately following another separator
	# (or the start-of-string), for example matching \b
	# immediately after matching \s+
	#
	# 2. zero-width separator matching immediately prior to another
	# separator, for example matching \b immediately prior to
	# matching \s+
	#
	# The first case is easy to handle, see the "if...continue" below.
	#
	# The second case may be impossible to handle, because finditer
	# seems to consider the matches to be overlapping in that case
	# (presumably because they both start at the same cursor position,
	# even though the zero-width case doesn't consume any characters).
	# Therefore we include a loop to handle this second case, but
	# it is probably ineffective and in fact the only solution is
	# for the user to order their alternatives properly:
	# r'\s+\|\b' rather than r'\b\|\s+'

	if m.start() == m.end() == prevm.end():
	# Skip a zero-width separator immediately following
	# another separator (or start-of-string).
	continue

	while nextm and m.start() == m.end() == nextm.start():
	# Try to find a non-zero width separator at this point
	# before accepting this one. (but see the note above)
	m, nextm = nextm, next(sepi, None)

	# Yield the content prior to this separator.
	yield s[prevm.end():m.start()]

	if m.start() == len(s):
	# Don't yield the end-of-string as a zero-length
	# separator. We're done.
	return

	# Yield this separator.
	yield s[m.start():m.end()]

	# There's always content following the last separator.
	yield s[m.end():]