Last active
December 31, 2023 05:45
-
-
Save ttappr/2f3daec19f5605dbc955c49d38dd6a39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! A solution to the HackerRank challenge, "String Function Calculation". | |
//! The problem is to find the maximum value for the formula: | |
//! | |
//! f(s) = |s| * number of times s occurs in t | |
//! | |
//! Where `t` is a body of text, |s| is the length of any substring that occurs | |
//! any number of times in it. The text is processed to find these recurrent | |
//! substrings. The longer any substring |s| is, the greater the value the | |
//! function will produce when the length is multiplied by the number of | |
//! occurrences. | |
//! | |
//! So we need a way to find substrings and the number of times they occur. | |
//! If we can find all repeating patterns, and sort them so they're all | |
//! adjacent to one another, then we can get the values for the formula. | |
//! | |
//! To find the repeating patterns, a suffix array and its corresponding LCP | |
//! array can be generated. | |
//! | |
//! Visualizing the suffix array, we can see repeating patterns of text lining | |
//! up. But these patterns are followed by extra text. The LCP array indicates | |
//! where the extra text begins - or where the matching text ends. | |
//! The LCP array can be thought of as an histogram whose values show how many | |
//! chacters match between adjacent suffixes. | |
//! | |
//! Thinking conceptually of the LCP array as an histogram, the problem then | |
//! becomes a matter of finding the largest rectangle within the bars. | |
//! | |
//! The time constraints on this problem are tight, and the test cases are | |
//! designed to bog down unoptimized solutions. The code that produces the | |
//! suffix array needs to be fast. A naive approach won't work. Use one of the | |
//! more performant solutions for this. | |
//! | |
//! If the suffix array and LCP array generation are optimized, then a naive | |
//! algorithm for finding the largest rectangle will be okay. Because of data | |
//! locality, simply scanning right and left of all points will be fast enough. | |
//! However, there is an O(n) algorithm for this. | |
//! | |
use std::error::Error; | |
use std::io::{BufReader, BufRead, stdin}; | |
fn main() -> Result<(), Box<dyn Error>> { | |
let s = BufReader::new(stdin()).lines() | |
.map(|l| l.unwrap()) | |
.next().unwrap().chars() | |
.collect::<Vec<_>>(); | |
let len = s.len(); | |
let sfx = suffix_array(&s); | |
let lcp = lcp_array(&s, &sfx); | |
let mut stack = vec![]; | |
let mut max = len; | |
let mut i = 0; | |
// O(n) stack based largest rectangle algorithm. | |
while i < len { | |
match stack.last() { | |
Some(&top) if lcp[i] < lcp[top] => { | |
stack.pop(); | |
let h = lcp[top]; | |
let a = { | |
if let Some(&top2) = stack.last() { | |
h * (i - top2) | |
} else { | |
h * (i + 1 ) | |
} | |
}; | |
max = max.max(a); | |
}, | |
_ => { | |
stack.push(i); | |
i += 1; | |
} | |
} | |
} | |
while let Some(top) = stack.pop() { | |
let h = lcp[top]; | |
let a = { | |
if let Some(&top2) = stack.last() { | |
h * (i - top2) | |
} else { | |
h * (i + 1) | |
} | |
}; | |
max = max.max(a); | |
} | |
println!("{}", max); | |
Ok(()) | |
} | |
/// Builds a suffix array. Much more time efficient than the | |
/// substring sort method. The code this was adapted from can be found here: | |
/// https://www.geeksforgeeks.org/suffix-array-set-2-a-nlognlogn-algorithm/ | |
/// Also, referenced in that article: | |
/// http://www.stanford.edu/class/cs97si/suffix-array.pdf | |
/// | |
fn suffix_array(string: &[char]) -> Vec<usize> { | |
struct Sfx { idx: usize, rank: (isize, isize) } | |
let len = string.len(); | |
let mut sfx = Vec::with_capacity(len); | |
let mut ind = vec![0; len]; | |
let mut k = 2; | |
for i in 0..len { | |
let a = string[i] as isize; | |
let b = { | |
if i < len - 1 { | |
string[i + 1] as isize | |
} else { -1 } | |
}; | |
sfx.push(Sfx { idx: i, rank: (a, b) }); | |
} | |
sfx.sort_by_key(|s| s.rank); | |
while k < len { | |
let mut rank = 0; | |
let mut prev = sfx[0].rank.0; | |
sfx[0].rank.0 = 0; | |
ind[sfx[0].idx] = 0; | |
for i in 1..len { | |
if sfx[i].rank.0 != prev | |
|| sfx[i].rank.1 != sfx[i - 1].rank.1 { | |
rank += 1; | |
} | |
prev = sfx[i].rank.0; | |
sfx[i].rank.0 = rank; | |
ind[sfx[i].idx] = i; | |
} | |
for i in 0..len { | |
let idx2 = sfx[i].idx + k; | |
sfx[i].rank.1 = { | |
if idx2 < len { | |
sfx[ind[idx2]].rank.0 | |
} else { -1 } | |
}; | |
} | |
sfx.sort_by_key(|s| s.rank); | |
k *= 2; | |
} | |
sfx.iter().map(|s| s.idx).collect() | |
} | |
/// Create an LCP array using the Kasai algorithm. This code was adapted from | |
/// This example: | |
/// https://www.geeksforgeeks.org/%C2%AD%C2%ADkasais-algorithm-for-construction-of-lcp-array-from-suffix-array/ | |
/// | |
fn lcp_array(string: &[char], suffixes: &[usize]) -> Vec<usize> { | |
let len = suffixes.len(); | |
let mut lcp = vec![0; len]; | |
let mut inv = vec![0; len]; | |
let mut k = 0; | |
for i in 0..len { | |
inv[suffixes[i]] = i; | |
} | |
for i in 0..len { | |
if inv[i] == len - 1 { | |
k = 0; | |
continue; | |
} | |
let j = suffixes[inv[i] + 1]; | |
while i + k < len && j + k < len | |
&& string[i + k] == string[j + k] { | |
k += 1; | |
} | |
lcp[inv[i]] = k; | |
if k > 0 { | |
k -= 1; | |
} | |
} | |
lcp | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment