Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dpapathanasiou
Created April 20, 2019 20:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dpapathanasiou/4161bc161157006ec9ce5d1ff7e931ec to your computer and use it in GitHub Desktop.
Save dpapathanasiou/4161bc161157006ec9ce5d1ff7e931ec to your computer and use it in GitHub Desktop.
Sliding Window
#!/usr/bin/env python
"""
An implementation of the "sliding window" technique to find shortest matching subarrays, inspired by:
https://leetcode.com/problems/find-all-anagrams-in-a-string/discuss/92007/sliding-window-algorithm-template-to-solve-all-the-leetcode-substring-search-problem
"""
from sys import maxint
def smallest_subarray(lst, st):
"""Return the smallest subarray from the list (lst) which contains all the members of the set (st) using a sliding window technique"""
if len(lst) < len(st):
# the set is larger than the list, so there is no common subarray
return []
m = {} # key = element from set, value = frequency in the set
for c in st:
freq = m.get(c, 0)
freq += 1
m[c] = freq
# track the number of unique elements in the set
counter = len(m.keys())
# left and right substring indices for iterating through the list
left = right = 0
# head and tail (start, stop indices) of the substring window in the list
head = 0
tail = maxint
while right < len(lst):
# start from left to right
c = lst[right]
if c in m:
# this character is in the set
freq = m[c]
freq -= 1
m[c] = freq
if freq == 0:
# decrement counter to note that this character is now included in the slice lst[left:right]
counter -= 1
right += 1
while counter == 0:
# counter at zero means all the elements of the set have been found,
# now tighten the range from the left
c = lst[left]
if c in m:
# this character (in a narrower range than found previously) is in the set
freq = m[c]
freq += 1
m[c] = freq
if freq > 0:
# break the loop once the desired frequency has been found
counter += 1
if right-left < tail:
# use the tightened ranges as the new optimal window
tail = right-left
head = left
left += 1
if tail == maxint:
# we never did find all the characters in the set in the desired frequencies
return []
return lst[head:head+tail]
"""
Tests:
Input: "aabbcb"
Alphabet: {'a', 'b', 'c'}
Output: "abbc"
Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
Alphabet: {'a', 'b', 'c'}
Output: "csba"
Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc"
Alphabet: {'a', 'b', 'c', 'f'}
Output: "" # not possible
"""
assert smallest_subarray(list("aabbcb"), ['a', 'b', 'c']) == ['a', 'b', 'b', 'c']
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c']) == ['c', 's', 'b', 'a']
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c', 'f']) == []
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment