Kristonitas/searchConsecutive.ts

## searchConsecutive.ts
// Stores the result match, can be used to store best current match
type Match = {
  // the chars that are in the longest consecutive sequence (char[])
  chars: string;
  // start index in original sequence (size_t)
  startIndex: number;
  // length of the matching sequence (size_t)
  length: number;
};

const searchConsecutive = (text: string, numChars: number): Match => {
  /**
   * Lets say we are searching for the longest sequence of any 3 characters.
   * When evaluating a certain position in the sequence, that position can be either the part of the longest sub-sequence (containing only 3 characters), or not.
   * Assume the longest sequence contains characters "xyz" and we are evaluating the position in the sequence that is a part of the longest sequence.
   * When we go forwards in the sequence and meet a character thats neither of the "xyz", we know that we are at the end of the sequence. What is unique about that position?
   * Current position is one of characters [xyz] and the next is not (![xyz]). Similarly if we go backwards in sequence
   * The idea behind this search algorithm is that only positions where neighboring characters are different is important.
   * When going forward its important when a unseen-new character appears first, or when a particular character appears for the last time (from before)
   *
   * When going from the start, lets index each character by its appearance in the sequence:
   * 11121221331214...
   * The moment "4" appears, the longest sequence with characters 123 is done. We then need to evaluate sequence with character 4 and two characters from the set of [123].
   * Which one? Going back from the initial appearance of number 4, we see numbers: 412133... Since number 4 is in the sequence and we must evaluate the consecutive sequence
   * it will be numbers 124. At that point, we need to establish the longest sequence with characters 124, that started at position, right after last appearance of 3, it will be
   * 1214...
   * Compare the length of the subsequence containing characters 124 with the length of the previous candidate (containing characters 123), if its longer, promote it to the potential result
   * At this point, 1112122133 can be forgotten about, and the sequence 1214... can be treated as the initial problem (with the caveat of tracking the longest potential result)
   * When hitting end of string, return existing longest potential result, if none is established at this point (looking for N+ unique character sequence when original seuqnce
   * has only N unique chars), the original pattern mathces the question, return it
   */

  // To evaluate (access) each character only once, keeping last appearances of each character
  type CharacterAppearanceInfo = {
    // character for which this info applies (char)
    char: string;
    // up until evaluate character in sequence, where was the last time character was used
    lastAppearance: number;
  };

  let match: Match | undefined;

  let searchStart = 0;
  let searchStack: CharacterAppearanceInfo[] = [];

  for (let i = 0; i < text.length; i++) {
    // console.log(match);
    const evaluatedCharacter = text[i];
    const infoInStack = searchStack.find((s) => s.char == evaluatedCharacter);

    // already in search stack, update last position
    if (infoInStack) {
      infoInStack.lastAppearance = i;
      continue;
    }

    // search stack not yet saturated character limit
    if (searchStack.length < numChars) {
      searchStack.push({
        char: evaluatedCharacter,
        lastAppearance: i,
      });
      continue;
    }
    // console.log({ i, searchStack, evaluatedCharacter });

    // found new character that is not in stack and stack needs to be updated
    // oh boy...

    // convert existing stack into a potential match and compare it
    const newMatch: Match = {
      chars: searchStack.map((s) => s.char).join(""),
      startIndex: searchStart,
      length: i - searchStart, // tricky to get this right, but since i is already advanced, no need to increment length by + 1
    };

    // change this to something if you need all candidates
    // this will return first candidate in sequence
    if (!match || newMatch.length > match.length) {
      match = newMatch;
    }

    // if we would to reverse the problem at this point, we would be checking where the sub-sequence finds the unexpected char,
    // it would be where the char with the earliest `lastAppearance` appeared. So if we go forward, that char needs to be dropped
    searchStack.sort((s) => s.lastAppearance);
    searchStart = searchStack[0].lastAppearance + 1;
    searchStack.shift();
    searchStack.push({
      char: evaluatedCharacter,
      lastAppearance: i,
    });
    // console.log({ i, searchStack, evaluatedCharacter });
  }

  // need to make a match after evaluting last character
  // this will also work as a match if sequence contained less characters than the search limit
  const newMatch: Match = {
    chars: searchStack.map((s) => s.char).join(""),
    startIndex: searchStart,
    length: text.length - searchStart,
  };

  if (!match || newMatch.length > match.length) {
    match = newMatch;
  }

  return match;
};

const test = (text: string, numChars: number) => {
  const match = searchConsecutive(text, numChars);
  const result = text.substring(
    match.startIndex,
    match.startIndex + match.length
  );

  const highlightedText =
    text.substring(0, match.startIndex) +
    "\x1b[33m" +
    result +
    "\x1b[37m" +
    text.substring(match.startIndex + match.length);

  console.log(
    `longest ${numChars} character sequence from ${highlightedText} is ${result}`
  );
};

test("abc", 1);
test("abc", 2);
test("abc", 3);
test("abc", 4);

test("aaaabccccddd", 1);
test("aaaabccccddd", 2);
test("aaaabccccddd", 3);
test("aaaabccccddd", 4);
test("aaaabccccddd", 5);
	// Stores the result match, can be used to store best current match
	type Match = {
	// the chars that are in the longest consecutive sequence (char[])
	chars: string;
	// start index in original sequence (size_t)
	startIndex: number;
	// length of the matching sequence (size_t)
	length: number;
	};

	const searchConsecutive = (text: string, numChars: number): Match => {
	/**
	* Lets say we are searching for the longest sequence of any 3 characters.
	* When evaluating a certain position in the sequence, that position can be either the part of the longest sub-sequence (containing only 3 characters), or not.
	* Assume the longest sequence contains characters "xyz" and we are evaluating the position in the sequence that is a part of the longest sequence.
	* When we go forwards in the sequence and meet a character thats neither of the "xyz", we know that we are at the end of the sequence. What is unique about that position?
	* Current position is one of characters [xyz] and the next is not (![xyz]). Similarly if we go backwards in sequence
	* The idea behind this search algorithm is that only positions where neighboring characters are different is important.
	* When going forward its important when a unseen-new character appears first, or when a particular character appears for the last time (from before)
	*
	* When going from the start, lets index each character by its appearance in the sequence:
	* 11121221331214...
	* The moment "4" appears, the longest sequence with characters 123 is done. We then need to evaluate sequence with character 4 and two characters from the set of [123].
	* Which one? Going back from the initial appearance of number 4, we see numbers: 412133... Since number 4 is in the sequence and we must evaluate the consecutive sequence
	* it will be numbers 124. At that point, we need to establish the longest sequence with characters 124, that started at position, right after last appearance of 3, it will be
	* 1214...
	* Compare the length of the subsequence containing characters 124 with the length of the previous candidate (containing characters 123), if its longer, promote it to the potential result
	* At this point, 1112122133 can be forgotten about, and the sequence 1214... can be treated as the initial problem (with the caveat of tracking the longest potential result)
	* When hitting end of string, return existing longest potential result, if none is established at this point (looking for N+ unique character sequence when original seuqnce
	* has only N unique chars), the original pattern mathces the question, return it
	*/

	// To evaluate (access) each character only once, keeping last appearances of each character
	type CharacterAppearanceInfo = {
	// character for which this info applies (char)
	char: string;
	// up until evaluate character in sequence, where was the last time character was used
	lastAppearance: number;
	};

	let match: Match \| undefined;

	let searchStart = 0;
	let searchStack: CharacterAppearanceInfo[] = [];

	for (let i = 0; i < text.length; i++) {
	// console.log(match);
	const evaluatedCharacter = text[i];
	const infoInStack = searchStack.find((s) => s.char == evaluatedCharacter);

	// already in search stack, update last position
	if (infoInStack) {
	infoInStack.lastAppearance = i;
	continue;
	}

	// search stack not yet saturated character limit
	if (searchStack.length < numChars) {
	searchStack.push({
	char: evaluatedCharacter,
	lastAppearance: i,
	});
	continue;
	}
	// console.log({ i, searchStack, evaluatedCharacter });

	// found new character that is not in stack and stack needs to be updated
	// oh boy...

	// convert existing stack into a potential match and compare it
	const newMatch: Match = {
	chars: searchStack.map((s) => s.char).join(""),
	startIndex: searchStart,
	length: i - searchStart, // tricky to get this right, but since i is already advanced, no need to increment length by + 1
	};

	// change this to something if you need all candidates
	// this will return first candidate in sequence
	if (!match \|\| newMatch.length > match.length) {
	match = newMatch;
	}

	// if we would to reverse the problem at this point, we would be checking where the sub-sequence finds the unexpected char,
	// it would be where the char with the earliest `lastAppearance` appeared. So if we go forward, that char needs to be dropped
	searchStack.sort((s) => s.lastAppearance);
	searchStart = searchStack[0].lastAppearance + 1;
	searchStack.shift();
	searchStack.push({
	char: evaluatedCharacter,
	lastAppearance: i,
	});
	// console.log({ i, searchStack, evaluatedCharacter });
	}

	// need to make a match after evaluting last character
	// this will also work as a match if sequence contained less characters than the search limit
	const newMatch: Match = {
	chars: searchStack.map((s) => s.char).join(""),
	startIndex: searchStart,
	length: text.length - searchStart,
	};

	if (!match \|\| newMatch.length > match.length) {
	match = newMatch;
	}

	return match;
	};

	const test = (text: string, numChars: number) => {
	const match = searchConsecutive(text, numChars);
	const result = text.substring(
	match.startIndex,
	match.startIndex + match.length
	);

	const highlightedText =
	text.substring(0, match.startIndex) +
	"\x1b[33m" +
	result +
	"\x1b[37m" +
	text.substring(match.startIndex + match.length);

	console.log(
	`longest ${numChars} character sequence from ${highlightedText} is ${result}`
	);
	};

	test("abc", 1);
	test("abc", 2);
	test("abc", 3);
	test("abc", 4);

	test("aaaabccccddd", 1);
	test("aaaabccccddd", 2);
	test("aaaabccccddd", 3);
	test("aaaabccccddd", 4);
	test("aaaabccccddd", 5);