Created
November 27, 2011 12:58
-
-
Save neesenk/1397527 to your computer and use it in GitHub Desktop.
查找一个字符串的所有重复字串
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* zhiyongliu <zhiyongliu@tencent.com> | |
* | |
* 查找一个字符串的所有的重复字串 | |
* 算法的核心思想是两个相同字符串的前缀一定相同, 在查找过程中先查找短的重复字串, | |
* 然后利用短的字串构建次短的重复字串, 直到字符串本身的长度或前一个结果为空集 | |
* 算法的复杂度为O(n) + O(m), n为字符串长度, m为重复字串的数目 | |
*/ | |
function findRepeatedSubstring(str) | |
{ | |
var stack = [{}]; | |
var push = function (ht, pos, off) { | |
if (!ht[pos]) | |
ht[pos] = []; | |
ht[pos].push(off); | |
}; | |
var trim = function (ht) { // 清除所有数目为1的字串 | |
var n = 0; | |
for (var elem in ht) { | |
if (ht[elem].length <= 1) | |
delete ht[elem]; | |
else | |
n++; | |
} | |
return n; | |
}; | |
for (var i = 0; i < str.length; i++) | |
push(stack[0], str[i], i); | |
if (trim(stack[0]) < 1) | |
return []; | |
for (var i = 1; i < str.length; i++) { | |
var obj = {}; | |
for (var elem in stack[i - 1]) { | |
for (var j = 0, sub = stack[i - 1][elem]; j < sub.length; j++) { | |
if (sub[j] + i + 1 > str.length) | |
continue; | |
push(obj, str.substr(sub[j], i + 1), sub[j]); | |
} | |
} | |
if (trim(obj) < 1) | |
break; | |
stack.push(obj); | |
} | |
return stack; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks