dpapathanasiou/sliding_window.py

## sliding_window.py
#!/usr/bin/env python

"""
An implementation of the "sliding window" technique to find shortest matching subarrays, inspired by:

https://leetcode.com/problems/find-all-anagrams-in-a-string/discuss/92007/sliding-window-algorithm-template-to-solve-all-the-leetcode-substring-search-problem

"""

from sys import maxint

def smallest_subarray(lst, st):
    """Return the smallest subarray from the list (lst) which contains all the members of the set (st) using a sliding window technique"""

    if len(lst) < len(st):
        # the set is larger than the list, so there is no common subarray
        return []

    m = {} # key = element from set, value = frequency in the set
    for c in st:
        freq = m.get(c, 0)
        freq += 1
        m[c] = freq

    # track the number of unique elements in the set
    counter = len(m.keys())

    # left and right substring indices for iterating through the list
    left = right = 0

    # head and tail (start, stop indices) of the substring window in the list
    head = 0
    tail = maxint

    while right < len(lst):
        # start from left to right
        c = lst[right]
        if c in m:
            # this character is in the set
            freq = m[c]
            freq -= 1
            m[c] = freq
            if freq == 0:
                # decrement counter to note that this character is now included in the slice lst[left:right]
                counter -= 1
        right += 1

        while counter == 0:
            # counter at zero means all the elements of the set have been found,
            # now tighten the range from the left
            c = lst[left]
            if c in m:
                # this character (in a narrower range than found previously) is in the set
                freq = m[c]
                freq += 1
                m[c] = freq
                if freq > 0:
                    # break the loop once the desired frequency has been found
                    counter += 1

            if right-left < tail:
                # use the tightened ranges as the new optimal window
                tail = right-left
                head = left

            left += 1

    if tail == maxint:
        # we never did find all the characters in the set in the desired frequencies
        return []

    return lst[head:head+tail]


"""
Tests:

Input: "aabbcb"
Alphabet: {'a', 'b', 'c'}
Output: "abbc"

Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
Alphabet: {'a', 'b', 'c'}
Output: "csba"

Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
Alphabet: {'a', 'b', 'c', 'f'}
Output: "" # not possible

"""

assert smallest_subarray(list("aabbcb"), ['a', 'b', 'c']) == ['a', 'b', 'b', 'c']
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c']) == ['c', 's', 'b', 'a']
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c', 'f']) == []
	#!/usr/bin/env python

	"""
	An implementation of the "sliding window" technique to find shortest matching subarrays, inspired by:

	https://leetcode.com/problems/find-all-anagrams-in-a-string/discuss/92007/sliding-window-algorithm-template-to-solve-all-the-leetcode-substring-search-problem

	"""

	from sys import maxint

	def smallest_subarray(lst, st):
	"""Return the smallest subarray from the list (lst) which contains all the members of the set (st) using a sliding window technique"""

	if len(lst) < len(st):
	# the set is larger than the list, so there is no common subarray
	return []

	m = {} # key = element from set, value = frequency in the set
	for c in st:
	freq = m.get(c, 0)
	freq += 1
	m[c] = freq

	# track the number of unique elements in the set
	counter = len(m.keys())

	# left and right substring indices for iterating through the list
	left = right = 0

	# head and tail (start, stop indices) of the substring window in the list
	head = 0
	tail = maxint

	while right < len(lst):
	# start from left to right
	c = lst[right]
	if c in m:
	# this character is in the set
	freq = m[c]
	freq -= 1
	m[c] = freq
	if freq == 0:
	# decrement counter to note that this character is now included in the slice lst[left:right]
	counter -= 1
	right += 1

	while counter == 0:
	# counter at zero means all the elements of the set have been found,
	# now tighten the range from the left
	c = lst[left]
	if c in m:
	# this character (in a narrower range than found previously) is in the set
	freq = m[c]
	freq += 1
	m[c] = freq
	if freq > 0:
	# break the loop once the desired frequency has been found
	counter += 1

	if right-left < tail:
	# use the tightened ranges as the new optimal window
	tail = right-left
	head = left

	left += 1

	if tail == maxint:
	# we never did find all the characters in the set in the desired frequencies
	return []

	return lst[head:head+tail]


	"""
	Tests:

	Input: "aabbcb"
	Alphabet: {'a', 'b', 'c'}
	Output: "abbc"

	Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
	Alphabet: {'a', 'b', 'c'}
	Output: "csba"

	Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
	Alphabet: {'a', 'b', 'c', 'f'}
	Output: "" # not possible

	"""

	assert smallest_subarray(list("aabbcb"), ['a', 'b', 'c']) == ['a', 'b', 'b', 'c']
	assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c']) == ['c', 's', 'b', 'a']
	assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c', 'f']) == []