ttappr/string_function.rs

## string_function.rs

//! A solution to the HackerRank challenge, "String Function Calculation".
//! The problem is to find the maximum value for the formula:
//!
//!     f(s) = |s| * number of times s occurs in t
//!
//! Where `t` is a body of text, |s| is the length of any substring that occurs
//! any number of times in it. The text is processed to find these recurrent
//! substrings. The longer any substring |s| is, the greater the value the
//! function will produce when the length is multiplied by the number of
//! occurrences.
//!
//! So we need a way to find substrings and the number of times they occur.
//! If we can find all repeating patterns, and sort them so they're all
//! adjacent to one another, then we can get the values for the formula.
//!
//! To find the repeating patterns, a suffix array and its corresponding LCP
//! array can be generated.
//!
//! Visualizing the suffix array, we can see repeating patterns of text lining
//! up. But these patterns are followed by extra text. The LCP array indicates
//! where the extra text begins - or where the matching text ends.
//! The LCP array can be thought of as an histogram whose values show how many
//! chacters match between adjacent suffixes.
//!
//! Thinking conceptually of the LCP array as an histogram, the problem then
//! becomes a matter of finding the largest rectangle within the bars.
//!
//! The time constraints on this problem are tight, and the test cases are
//! designed to bog down unoptimized solutions. The code that produces the
//! suffix array needs to be fast. A naive approach won't work. Use one of the
//! more performant solutions for this.
//!
//! If the suffix array and LCP array generation are optimized, then a naive
//! algorithm for finding the largest rectangle will be okay. Because of data
//! locality, simply scanning right and left of all points will be fast enough.
//! However, there is an O(n) algorithm for this.
//!

use std::error::Error;
use std::io::{BufReader, BufRead, stdin};

fn main() -> Result<(), Box<dyn Error>> {
    let s = BufReader::new(stdin()).lines()
                                   .map(|l| l.unwrap())
                                   .next().unwrap().chars()
                                   .collect::<Vec<_>>();
    let len = s.len();
    let sfx = suffix_array(&s);
    let lcp = lcp_array(&s, &sfx);

    let mut stack = vec![];
    let mut max = len;
    let mut i     = 0;

    // O(n) stack based largest rectangle algorithm.
    while i < len {
        match stack.last() {
            Some(&top) if lcp[i] < lcp[top] => {
                stack.pop();
                let h = lcp[top];
                let a = {
                    if let Some(&top2) = stack.last() {
                        h * (i - top2)
                    } else {
                        h * (i + 1   )
                    }
                };
                max = max.max(a);
            },
            _ => {
                stack.push(i);
                i += 1;
            }
        }
    }
    while let Some(top) = stack.pop() {
        let h = lcp[top];
        let a = {
            if let Some(&top2) = stack.last() {
                h * (i - top2)
            } else {
                h * (i + 1)
            }
        };
        max = max.max(a);
    }
    println!("{}", max);
    Ok(())
}
/// Builds a suffix array. Much more time efficient than the
/// substring sort method. The code this was adapted from can be found here:
/// https://www.geeksforgeeks.org/suffix-array-set-2-a-nlognlogn-algorithm/
/// Also, referenced in that article:
/// http://www.stanford.edu/class/cs97si/suffix-array.pdf
///
fn suffix_array(string: &[char]) -> Vec<usize> {
    struct Sfx { idx: usize, rank: (isize, isize) }

    let     len = string.len();
    let mut sfx = Vec::with_capacity(len);
    let mut ind = vec![0; len];
    let mut k   = 2;

    for i in 0..len {
        let a = string[i] as isize;
        let b = {
            if i < len - 1 {
                string[i + 1] as isize
            } else { -1 }
        };
        sfx.push(Sfx { idx: i, rank: (a, b) });
    }
    sfx.sort_by_key(|s| s.rank);

    while k < len {
        let mut rank = 0;
        let mut prev = sfx[0].rank.0;

        sfx[0].rank.0   = 0;
        ind[sfx[0].idx] = 0;

        for i in 1..len {
            if sfx[i].rank.0 != prev
                || sfx[i].rank.1 != sfx[i - 1].rank.1 {
                rank += 1;
            }
            prev = sfx[i].rank.0;
            sfx[i].rank.0 = rank;
            ind[sfx[i].idx] = i;
        }
        for i in 0..len {
            let idx2 = sfx[i].idx + k;
            sfx[i].rank.1 = {
                if idx2 < len {
                    sfx[ind[idx2]].rank.0
                } else { -1 }
            };
        }
        sfx.sort_by_key(|s| s.rank);
        k *= 2;
    }
    sfx.iter().map(|s| s.idx).collect()
}
/// Create an LCP array using the Kasai algorithm. This code was adapted from
/// This example:
/// https://www.geeksforgeeks.org/%C2%AD%C2%ADkasais-algorithm-for-construction-of-lcp-array-from-suffix-array/
///
fn lcp_array(string: &[char], suffixes: &[usize]) -> Vec<usize> {
    let     len = suffixes.len();
    let mut lcp = vec![0; len];
    let mut inv = vec![0; len];
    let mut k   = 0;

    for i in 0..len {
        inv[suffixes[i]] = i;
    }
    for i in 0..len {
        if inv[i] == len - 1 {
            k = 0;
            continue;
        }
        let j = suffixes[inv[i] + 1];

        while i + k < len && j + k < len
            && string[i + k] == string[j + k] {
            k += 1;
        }
        lcp[inv[i]] = k;
        if k > 0 {
            k -= 1;
        }
    }
    lcp
}

	//! A solution to the HackerRank challenge, "String Function Calculation".
	//! The problem is to find the maximum value for the formula:
	//!
	//! f(s) = \|s\| * number of times s occurs in t
	//!
	//! Where `t` is a body of text, \|s\| is the length of any substring that occurs
	//! any number of times in it. The text is processed to find these recurrent
	//! substrings. The longer any substring \|s\| is, the greater the value the
	//! function will produce when the length is multiplied by the number of
	//! occurrences.
	//!
	//! So we need a way to find substrings and the number of times they occur.
	//! If we can find all repeating patterns, and sort them so they're all
	//! adjacent to one another, then we can get the values for the formula.
	//!
	//! To find the repeating patterns, a suffix array and its corresponding LCP
	//! array can be generated.
	//!
	//! Visualizing the suffix array, we can see repeating patterns of text lining
	//! up. But these patterns are followed by extra text. The LCP array indicates
	//! where the extra text begins - or where the matching text ends.
	//! The LCP array can be thought of as an histogram whose values show how many
	//! chacters match between adjacent suffixes.
	//!
	//! Thinking conceptually of the LCP array as an histogram, the problem then
	//! becomes a matter of finding the largest rectangle within the bars.
	//!
	//! The time constraints on this problem are tight, and the test cases are
	//! designed to bog down unoptimized solutions. The code that produces the
	//! suffix array needs to be fast. A naive approach won't work. Use one of the
	//! more performant solutions for this.
	//!
	//! If the suffix array and LCP array generation are optimized, then a naive
	//! algorithm for finding the largest rectangle will be okay. Because of data
	//! locality, simply scanning right and left of all points will be fast enough.
	//! However, there is an O(n) algorithm for this.
	//!

	use std::error::Error;
	use std::io::{BufReader, BufRead, stdin};

	fn main() -> Result<(), Box<dyn Error>> {
	let s = BufReader::new(stdin()).lines()
	.map(\|l\| l.unwrap())
	.next().unwrap().chars()
	.collect::<Vec<_>>();
	let len = s.len();
	let sfx = suffix_array(&s);
	let lcp = lcp_array(&s, &sfx);

	let mut stack = vec![];
	let mut max = len;
	let mut i = 0;

	// O(n) stack based largest rectangle algorithm.
	while i < len {
	match stack.last() {
	Some(&top) if lcp[i] < lcp[top] => {
	stack.pop();
	let h = lcp[top];
	let a = {
	if let Some(&top2) = stack.last() {
	h * (i - top2)
	} else {
	h * (i + 1 )
	}
	};
	max = max.max(a);
	},
	_ => {
	stack.push(i);
	i += 1;
	}
	}
	}
	while let Some(top) = stack.pop() {
	let h = lcp[top];
	let a = {
	if let Some(&top2) = stack.last() {
	h * (i - top2)
	} else {
	h * (i + 1)
	}
	};
	max = max.max(a);
	}
	println!("{}", max);
	Ok(())
	}
	/// Builds a suffix array. Much more time efficient than the
	/// substring sort method. The code this was adapted from can be found here:
	/// https://www.geeksforgeeks.org/suffix-array-set-2-a-nlognlogn-algorithm/
	/// Also, referenced in that article:
	/// http://www.stanford.edu/class/cs97si/suffix-array.pdf
	///
	fn suffix_array(string: &[char]) -> Vec<usize> {
	struct Sfx { idx: usize, rank: (isize, isize) }

	let len = string.len();
	let mut sfx = Vec::with_capacity(len);
	let mut ind = vec![0; len];
	let mut k = 2;

	for i in 0..len {
	let a = string[i] as isize;
	let b = {
	if i < len - 1 {
	string[i + 1] as isize
	} else { -1 }
	};
	sfx.push(Sfx { idx: i, rank: (a, b) });
	}
	sfx.sort_by_key(\|s\| s.rank);

	while k < len {
	let mut rank = 0;
	let mut prev = sfx[0].rank.0;

	sfx[0].rank.0 = 0;
	ind[sfx[0].idx] = 0;

	for i in 1..len {
	if sfx[i].rank.0 != prev
	\|\| sfx[i].rank.1 != sfx[i - 1].rank.1 {
	rank += 1;
	}
	prev = sfx[i].rank.0;
	sfx[i].rank.0 = rank;
	ind[sfx[i].idx] = i;
	}
	for i in 0..len {
	let idx2 = sfx[i].idx + k;
	sfx[i].rank.1 = {
	if idx2 < len {
	sfx[ind[idx2]].rank.0
	} else { -1 }
	};
	}
	sfx.sort_by_key(\|s\| s.rank);
	k *= 2;
	}
	sfx.iter().map(\|s\| s.idx).collect()
	}
	/// Create an LCP array using the Kasai algorithm. This code was adapted from
	/// This example:
	/// https://www.geeksforgeeks.org/%C2%AD%C2%ADkasais-algorithm-for-construction-of-lcp-array-from-suffix-array/
	///
	fn lcp_array(string: &[char], suffixes: &[usize]) -> Vec<usize> {
	let len = suffixes.len();
	let mut lcp = vec![0; len];
	let mut inv = vec![0; len];
	let mut k = 0;

	for i in 0..len {
	inv[suffixes[i]] = i;
	}
	for i in 0..len {
	if inv[i] == len - 1 {
	k = 0;
	continue;
	}
	let j = suffixes[inv[i] + 1];

	while i + k < len && j + k < len
	&& string[i + k] == string[j + k] {
	k += 1;
	}
	lcp[inv[i]] = k;
	if k > 0 {
	k -= 1;
	}
	}
	lcp
	}