ayzu/words_concatenation.go

## words_concatenation.go
package main

// WordsConcatenation returns indices of starting positions
// for substrings that contains a concatenation of all target words.
/*

	general sliding window pattern of a constant size.
	1. iterate through items in the array till you fill the window.
	2. process the remaining items in the step-wise fashion,
	and on each step check whether we fulfill the required condition.
	If the condition is fulfilled try to shrink the window from the left,
	and see whether the condition is still satisfied.
	3. to prepare for a new iteration, drop the top left character,
	and update the stats of the window if necessary.

	In this case, concatenation of all target words is basically a
	permutation of all the words. Therefore, by item in the array
	we mean a substring of size equal to the the size of the words.
	We iterate thorough each symbol in the array, and constitute
	a new item with the last character equal to the symbol.
	At each step we count the number of matches.
	If we have matched all the words, we have found a required
	concatenation in the array. To track the number of matches
	we will have a running counter of needed matches with initial value
	equal to the number of the target words.
	If the current item matches any of the required words we
	decrease the counter. Corollary, if the top left item
	we are going to drop matches the target word, we increase the counter.
	When the counter goes down to zero, we have all the required matches.

	However, a single match counter is not enough: if we have two occurrences
	of a word in the window, and we only need one, then we would decrease the counter twice.
	We need to check not only whether an item equals to one of the target words,
	but also whether we already matched the word earlier. To do so, we calculate a frequency map
	of the targeted words, where words will be used as keys, and number of needed occurrences
	as values. When an item matches a word, we decrease the frequency of the word. When the
	frequency goes down to zero, we have matched the word the required number of items.
	If found one more occurrence of the word after that, we have too many "instances" of the word
	in the window, and the match wasn't useful. Therefore, we decrease the match counter, only
	we the item is present in the frequency map and still have a positive count.

	O(n)

*/
func WordsConcatenation(str string, words []string) []int {
	matches := len(words) // remaining number of matches
	var start int         // start of the sliding window, not the start of the current item
	freqs := make(map[string]int)
	for _, word := range words { // number of words to fill per word
		freqs[word]++
	}
	wordSize := len(words[0])
	var res []int

	// stop is included; stop denotes both the last character of the current item and the last character of the window
	for stop := wordSize - 1; stop < len(str); stop++ {
		itemStart := stop + 1 - wordSize
		item := str[itemStart : stop+1]
		if freq, exists := freqs[item]; exists {
			// if frequency is zero it means that we already filled all positions for the word
			// if frequency is less than zero it means that we have more occurrences of the word
			// in the current window
			// in both such cases we don't need any more occurrence of the word,
			// therefore, the match wasn't useful and it shouldn't be counted.
			if freq > 0 {
				matches--
			}
			freqs[item]--
		}

		if matches == 0 {
			// shrink from the left
			start = stop - len(words)*wordSize + 1
			res = append(res, start)
		}

		// let's say we have an array 'aa - bb - cc', and word size is equal to two
		// and we have two words in total
		// we full the window on the second item, and in the above if condition
		// check whether the condition is satisfied
		// now we have to prepare for the next iteration, i.e. accommodate space
		// for the next item. To do so we drop the top left character.

		if stop < len(words)*wordSize-1 { // have not yet filled the window, no need to drop the top left item
			continue
		}

		left := str[start : start+wordSize]
		start++
		if freq, exists := freqs[left]; exists {
			if freq >= 0 {
				matches++
			}
			freqs[left]++
		}
	}

	return res
}
	package main

	// WordsConcatenation returns indices of starting positions
	// for substrings that contains a concatenation of all target words.
	/*

	general sliding window pattern of a constant size.
	1. iterate through items in the array till you fill the window.
	2. process the remaining items in the step-wise fashion,
	and on each step check whether we fulfill the required condition.
	If the condition is fulfilled try to shrink the window from the left,
	and see whether the condition is still satisfied.
	3. to prepare for a new iteration, drop the top left character,
	and update the stats of the window if necessary.

	In this case, concatenation of all target words is basically a
	permutation of all the words. Therefore, by item in the array
	we mean a substring of size equal to the the size of the words.
	We iterate thorough each symbol in the array, and constitute
	a new item with the last character equal to the symbol.
	At each step we count the number of matches.
	If we have matched all the words, we have found a required
	concatenation in the array. To track the number of matches
	we will have a running counter of needed matches with initial value
	equal to the number of the target words.
	If the current item matches any of the required words we
	decrease the counter. Corollary, if the top left item
	we are going to drop matches the target word, we increase the counter.
	When the counter goes down to zero, we have all the required matches.

	However, a single match counter is not enough: if we have two occurrences
	of a word in the window, and we only need one, then we would decrease the counter twice.
	We need to check not only whether an item equals to one of the target words,
	but also whether we already matched the word earlier. To do so, we calculate a frequency map
	of the targeted words, where words will be used as keys, and number of needed occurrences
	as values. When an item matches a word, we decrease the frequency of the word. When the
	frequency goes down to zero, we have matched the word the required number of items.
	If found one more occurrence of the word after that, we have too many "instances" of the word
	in the window, and the match wasn't useful. Therefore, we decrease the match counter, only
	we the item is present in the frequency map and still have a positive count.

	O(n)

	*/
	func WordsConcatenation(str string, words []string) []int {
	matches := len(words) // remaining number of matches
	var start int // start of the sliding window, not the start of the current item
	freqs := make(map[string]int)
	for _, word := range words { // number of words to fill per word
	freqs[word]++
	}
	wordSize := len(words[0])
	var res []int

	// stop is included; stop denotes both the last character of the current item and the last character of the window
	for stop := wordSize - 1; stop < len(str); stop++ {
	itemStart := stop + 1 - wordSize
	item := str[itemStart : stop+1]
	if freq, exists := freqs[item]; exists {
	// if frequency is zero it means that we already filled all positions for the word
	// if frequency is less than zero it means that we have more occurrences of the word
	// in the current window
	// in both such cases we don't need any more occurrence of the word,
	// therefore, the match wasn't useful and it shouldn't be counted.
	if freq > 0 {
	matches--
	}
	freqs[item]--
	}

	if matches == 0 {
	// shrink from the left
	start = stop - len(words)*wordSize + 1
	res = append(res, start)
	}

	// let's say we have an array 'aa - bb - cc', and word size is equal to two
	// and we have two words in total
	// we full the window on the second item, and in the above if condition
	// check whether the condition is satisfied
	// now we have to prepare for the next iteration, i.e. accommodate space
	// for the next item. To do so we drop the top left character.

	if stop < len(words)*wordSize-1 { // have not yet filled the window, no need to drop the top left item
	continue
	}

	left := str[start : start+wordSize]
	start++
	if freq, exists := freqs[left]; exists {
	if freq >= 0 {
	matches++
	}
	freqs[left]++
	}
	}

	return res
	}