Skip to content

Instantly share code, notes, and snippets.

@micromaomao
Created July 21, 2020 19:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save micromaomao/ec4751f10442368d7ed0930200ed7a39 to your computer and use it in GitHub Desktop.
Save micromaomao/ec4751f10442368d7ed0930200ed7a39 to your computer and use it in GitHub Desktop.
#include <string>
#include <algorithm>
#include <optional>
#include <vector>
#include <cassert>
using namespace std;
optional<string_view> solve(string_view chars, size_t assume_len) {
if (assume_len >= chars.size()) {
return optional<string_view>();
}
const uint32_t BASE = 7;
const uint32_t BASE_RCP = 18725;
const uint32_t HM_SIZE = 65537;
vector<vector<string_view>> hm = vector(HM_SIZE, vector<string_view>());
uint32_t current_hash = 0;
uint32_t cpow = 1;
for (size_t i = 0; i < assume_len; i++) {
char c = chars.at(i);
current_hash = ((current_hash % HM_SIZE) + (cpow * uint32_t(c)) % HM_SIZE) % HM_SIZE;
if (i != assume_len - 1) {
cpow = (cpow * BASE) % HM_SIZE;
}
}
hm[current_hash].push_back(string_view(chars).substr(0, assume_len));
for (size_t i = 1; i < chars.size() - assume_len + 1; i++) {
current_hash = (current_hash + HM_SIZE - uint32_t(chars.at(i - 1))) % HM_SIZE;
current_hash = (current_hash * BASE_RCP) % HM_SIZE;
current_hash = (current_hash + cpow * uint32_t(chars.at(i + assume_len - 1)) % HM_SIZE) % HM_SIZE;
auto current_substr = string_view(chars).substr(i, assume_len);
auto &mm = hm.at(current_hash);
auto match = find_if(mm.cbegin(), mm.cend(), [&](const string_view &sv) -> bool {
return current_substr == sv;
});
if (match != mm.cend()) {
assert(match->data() != current_substr.data());
return optional(*match);
}
mm.push_back(current_substr);
}
return optional<string_view>();
}
class Solution {
public:
string longestDupSubstring(string chars) {
size_t low = 1;
size_t high = chars.size();
optional<string_view> got_ans;
while (low < high) {
size_t mid = (low + high - 1) / 2;
auto sol = solve(chars, mid);
if (sol.has_value()) {
low = mid + 1;
got_ans = sol;
} else {
high = mid;
}
}
if (got_ans.has_value()) {
return string(got_ans.value());
} else {
return "";
}
}
};
struct Solution;
impl Solution {
pub fn longest_dup_substring(s: String) -> String {
let mut chars = s.as_bytes();
fn solve(chars: &[u8], assume_len: usize) -> Option<&[u8]> {
if assume_len >= chars.len() {
return None;
}
const BASE: u32 = 7;
const BASE_RCP: u32 = 18725;
const HM_SIZE: u32 = 65537;
let mut hm: Vec<Vec<&[u8]>> = vec![Vec::new(); HM_SIZE as usize];
let mut current_hash = 0u32;
let mut cpow: u32 = 1;
for i in 0..assume_len {
let c = chars[i];
current_hash = ((current_hash % HM_SIZE) + (cpow * (c as u32)) % HM_SIZE) % HM_SIZE;
if i != assume_len - 1 {
cpow = (cpow * BASE) % HM_SIZE;
}
}
hm[current_hash as usize].push(&chars[0..assume_len]);
for i in 1..(chars.len() - assume_len + 1) {
current_hash = (current_hash + HM_SIZE - (chars[i - 1] as u32)) % HM_SIZE;
current_hash = (current_hash * BASE_RCP) % HM_SIZE;
current_hash = (current_hash + cpow * (chars[i + assume_len - 1] as u32) % HM_SIZE) % HM_SIZE;
let current_substr = &chars[i..i + assume_len];
let mm = &mut hm[current_hash as usize];
if let Some(prev) = mm.iter().find(|x| **x == current_substr) {
assert_ne!(prev.as_ptr(), current_substr.as_ptr());
return Some(*prev);
}
mm.push(current_substr);
}
None
}
let mut low = 1;
let mut high = chars.len();
let mut got_ans: Option<&[u8]> = None;
while low < high {
let mid = (low + high - 1) / 2;
if let Some(ans) = solve(chars, mid) {
low = mid + 1;
got_ans = Some(ans);
} else {
high = mid;
}
}
return String::from_utf8(got_ans.unwrap_or(&[]).to_vec()).unwrap();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment