ducaale/rolling_sums.js

## rolling_sums.js
// Given a list of sets [set1, set2, set3, ... setn], and a window with start and end such that (end - start) is constant.
// What is the fastest way to get all possible aggregates of the sets by moving start and end by 1 each time?
//
// example:
// const data = [set1, set2, set3, set4, set5, set6, set7, set8, set9];
// const windowSize = 4
// const [s, e] = [0, windowSize]
//
// in this case, our result should be:
// const result = [
//   set1 + set2 + set3 + set4,
//   set2 + set3 + set4 + set5,
//   set3 + set4 + set5 + set6,
//   set4 + set5 + set6 + set7,
//   set5 + set6 + set7 + set8,
//   set6 + set7 + set8 + set9,
// ];
//
// Note that addition and subtraction in this context means Set.union and Set.difference respectively.

// one way to optimize this is by subtracting set[s-1] and adding set[e] each time. For example, if we had
// (set1 + set2 + set3 + set4) in the first entry, our second entry could be equal to
// (set1 + set2 + set3 + set4) - set1 + set5.
// Unfortunately, I wasn't able to achieve this as it's hard to only subtract elements that are unique to set1
//
// Another way to approach this is by partitioning our data into 2 parts and doing an scan operation on them (reduce that
// leaves intermediate results) on one part and doing scanBack on the other.
//
// part1 = [set1+set2+set3+set4, set1+set2+set3, set1+set2, set1]
// part2 = [set5, set5+set6, set5+set6+set7, set5+set6+set7+set8]
//
// at this point, we just need to add up the last item from part1 to the first item from part2 and so on...
//
// result = [(part1[0] + part2[3]), (part1[1] + part2[2]), (part1[2] + part2[1]), (part1[3] + part2[0])]
//
// as a result, we've reduced our union operation calls from 18 to 10!

const union = (setA, setB) => {
    let _union = new Set(setA)
    for (let elem of setB) { _union.add(elem) }
    return _union
}
const last = arr => arr[arr.length-1]
const scan = (arr, fn) =>
  arr.reduce(
    (accum, current) =>
      accum.length === 0 ? [current] : [...accum, fn(current, last(accum))],
    []
  )
const scanBack = (arr, fn) => scan([...arr].reverse(), fn).reverse()
const partition = arr => [
  arr.slice(0, Math.floor(arr.length/2)),
  arr.slice(Math.floor(arr.length/2))
]

// naive approach
const getRolling1 = (data, windowSize, duration) => {
  const rolling = []
  for (let i = (data.length - duration)+1; i <= data.length; i++) {
    rolling.push(
      data.slice(i-windowSize, i).reduce(union)
    )
  }
  return rolling.slice(-duration)
}

// slightly less naive approach
const getRolling2 = (data, windowSize, duration) => {
  let rolling = []

  for (let i = data.length; rolling.length < data.length - duration; i = i-(windowSize+1)) {
    const rollingTemp = []

    const [b1, b2] = partition(data.slice(i-(windowSize*2), i))
    const temp1 = scanBack(b1, union)
    const temp2 = scan(b2, union)

    rollingTemp.push(temp1[0])
    for (let i = 1; i < temp1.length; i++) {
      rollingTemp.push(union(temp1[i], temp2[i-1]))
    }
    rollingTemp.push(last(temp2))

    rolling = [...rollingTemp, ...rolling]
  }

  return rolling.slice(-duration)
}

const randInt = (min, max) => Math.floor(Math.random() * (max - min) + min);
const range = n => [...Array(n).keys()];
const buckets = range(60).map(() => {
  const bucket = new Set()
  for (const i in range(10)) { bucket.add(randInt(0, 1000)) }
  return bucket
})

console.log(getRolling1(buckets, 7, 30).map(r => r.size))
console.log(getRolling2(buckets, 7, 30).map(r => r.size))

// getRolling1(buckets, 7, 30)  // 180 union operations
// getRolling1(buckets, 30, 30) // 870 union operations

// getRolling2(buckets, 7, 30)  // 72 union operations
// getRolling2(buckets, 30, 30) // 87 union operations
	// Given a list of sets [set1, set2, set3, ... setn], and a window with start and end such that (end - start) is constant.
	// What is the fastest way to get all possible aggregates of the sets by moving start and end by 1 each time?
	//
	// example:
	// const data = [set1, set2, set3, set4, set5, set6, set7, set8, set9];
	// const windowSize = 4
	// const [s, e] = [0, windowSize]
	//
	// in this case, our result should be:
	// const result = [
	// set1 + set2 + set3 + set4,
	// set2 + set3 + set4 + set5,
	// set3 + set4 + set5 + set6,
	// set4 + set5 + set6 + set7,
	// set5 + set6 + set7 + set8,
	// set6 + set7 + set8 + set9,
	// ];
	//
	// Note that addition and subtraction in this context means Set.union and Set.difference respectively.

	// one way to optimize this is by subtracting set[s-1] and adding set[e] each time. For example, if we had
	// (set1 + set2 + set3 + set4) in the first entry, our second entry could be equal to
	// (set1 + set2 + set3 + set4) - set1 + set5.
	// Unfortunately, I wasn't able to achieve this as it's hard to only subtract elements that are unique to set1
	//
	// Another way to approach this is by partitioning our data into 2 parts and doing an scan operation on them (reduce that
	// leaves intermediate results) on one part and doing scanBack on the other.
	//
	// part1 = [set1+set2+set3+set4, set1+set2+set3, set1+set2, set1]
	// part2 = [set5, set5+set6, set5+set6+set7, set5+set6+set7+set8]
	//
	// at this point, we just need to add up the last item from part1 to the first item from part2 and so on...
	//
	// result = [(part1[0] + part2[3]), (part1[1] + part2[2]), (part1[2] + part2[1]), (part1[3] + part2[0])]
	//
	// as a result, we've reduced our union operation calls from 18 to 10!

	const union = (setA, setB) => {
	let _union = new Set(setA)
	for (let elem of setB) { _union.add(elem) }
	return _union
	}
	const last = arr => arr[arr.length-1]
	const scan = (arr, fn) =>
	arr.reduce(
	(accum, current) =>
	accum.length === 0 ? [current] : [...accum, fn(current, last(accum))],
	[]
	)
	const scanBack = (arr, fn) => scan([...arr].reverse(), fn).reverse()
	const partition = arr => [
	arr.slice(0, Math.floor(arr.length/2)),
	arr.slice(Math.floor(arr.length/2))
	]

	// naive approach
	const getRolling1 = (data, windowSize, duration) => {
	const rolling = []
	for (let i = (data.length - duration)+1; i <= data.length; i++) {
	rolling.push(
	data.slice(i-windowSize, i).reduce(union)
	)
	}
	return rolling.slice(-duration)
	}

	// slightly less naive approach
	const getRolling2 = (data, windowSize, duration) => {
	let rolling = []

	for (let i = data.length; rolling.length < data.length - duration; i = i-(windowSize+1)) {
	const rollingTemp = []

	const [b1, b2] = partition(data.slice(i-(windowSize*2), i))
	const temp1 = scanBack(b1, union)
	const temp2 = scan(b2, union)

	rollingTemp.push(temp1[0])
	for (let i = 1; i < temp1.length; i++) {
	rollingTemp.push(union(temp1[i], temp2[i-1]))
	}
	rollingTemp.push(last(temp2))

	rolling = [...rollingTemp, ...rolling]
	}

	return rolling.slice(-duration)
	}

	const randInt = (min, max) => Math.floor(Math.random() * (max - min) + min);
	const range = n => [...Array(n).keys()];
	const buckets = range(60).map(() => {
	const bucket = new Set()
	for (const i in range(10)) { bucket.add(randInt(0, 1000)) }
	return bucket
	})

	console.log(getRolling1(buckets, 7, 30).map(r => r.size))
	console.log(getRolling2(buckets, 7, 30).map(r => r.size))

	// getRolling1(buckets, 7, 30) // 180 union operations
	// getRolling1(buckets, 30, 30) // 870 union operations

	// getRolling2(buckets, 7, 30) // 72 union operations
	// getRolling2(buckets, 30, 30) // 87 union operations