Created
December 11, 2020 09:08
-
-
Save ayzu/c57ed8a660580d66e71fbeec07d90185 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// WordsConcatenation returns indices of starting positions | |
// for substrings that contains a concatenation of all target words. | |
/* | |
general sliding window pattern of a constant size. | |
1. iterate through items in the array till you fill the window. | |
2. process the remaining items in the step-wise fashion, | |
and on each step check whether we fulfill the required condition. | |
If the condition is fulfilled try to shrink the window from the left, | |
and see whether the condition is still satisfied. | |
3. to prepare for a new iteration, drop the top left character, | |
and update the stats of the window if necessary. | |
In this case, concatenation of all target words is basically a | |
permutation of all the words. Therefore, by item in the array | |
we mean a substring of size equal to the the size of the words. | |
We iterate thorough each symbol in the array, and constitute | |
a new item with the last character equal to the symbol. | |
At each step we count the number of matches. | |
If we have matched all the words, we have found a required | |
concatenation in the array. To track the number of matches | |
we will have a running counter of needed matches with initial value | |
equal to the number of the target words. | |
If the current item matches any of the required words we | |
decrease the counter. Corollary, if the top left item | |
we are going to drop matches the target word, we increase the counter. | |
When the counter goes down to zero, we have all the required matches. | |
However, a single match counter is not enough: if we have two occurrences | |
of a word in the window, and we only need one, then we would decrease the counter twice. | |
We need to check not only whether an item equals to one of the target words, | |
but also whether we already matched the word earlier. To do so, we calculate a frequency map | |
of the targeted words, where words will be used as keys, and number of needed occurrences | |
as values. When an item matches a word, we decrease the frequency of the word. When the | |
frequency goes down to zero, we have matched the word the required number of items. | |
If found one more occurrence of the word after that, we have too many "instances" of the word | |
in the window, and the match wasn't useful. Therefore, we decrease the match counter, only | |
we the item is present in the frequency map and still have a positive count. | |
O(n) | |
*/ | |
func WordsConcatenation(str string, words []string) []int { | |
matches := len(words) // remaining number of matches | |
var start int // start of the sliding window, not the start of the current item | |
freqs := make(map[string]int) | |
for _, word := range words { // number of words to fill per word | |
freqs[word]++ | |
} | |
wordSize := len(words[0]) | |
var res []int | |
// stop is included; stop denotes both the last character of the current item and the last character of the window | |
for stop := wordSize - 1; stop < len(str); stop++ { | |
itemStart := stop + 1 - wordSize | |
item := str[itemStart : stop+1] | |
if freq, exists := freqs[item]; exists { | |
// if frequency is zero it means that we already filled all positions for the word | |
// if frequency is less than zero it means that we have more occurrences of the word | |
// in the current window | |
// in both such cases we don't need any more occurrence of the word, | |
// therefore, the match wasn't useful and it shouldn't be counted. | |
if freq > 0 { | |
matches-- | |
} | |
freqs[item]-- | |
} | |
if matches == 0 { | |
// shrink from the left | |
start = stop - len(words)*wordSize + 1 | |
res = append(res, start) | |
} | |
// let's say we have an array 'aa - bb - cc', and word size is equal to two | |
// and we have two words in total | |
// we full the window on the second item, and in the above if condition | |
// check whether the condition is satisfied | |
// now we have to prepare for the next iteration, i.e. accommodate space | |
// for the next item. To do so we drop the top left character. | |
if stop < len(words)*wordSize-1 { // have not yet filled the window, no need to drop the top left item | |
continue | |
} | |
left := str[start : start+wordSize] | |
start++ | |
if freq, exists := freqs[left]; exists { | |
if freq >= 0 { | |
matches++ | |
} | |
freqs[left]++ | |
} | |
} | |
return res | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment