Skip to content

Instantly share code, notes, and snippets.

@senarukana
Created March 13, 2015 06:46
Show Gist options
  • Save senarukana/eea81781fdc10d211c1f to your computer and use it in GitHub Desktop.
Save senarukana/eea81781fdc10d211c1f to your computer and use it in GitHub Desktop.
字符串中重复次数最多的子串
ALPHABETSIZE = 26
def get_index(c):
return ord(c)-ord('a')
class PostfixNode:
def __init__(self):
self.cnt = 0
self.children = [None]*ALPHABETSIZE
def insert(self, s, i):
self.cnt += 1
if len(s) == i:
return self.cnt, s[:i]
idx = get_index(s[i])
if not self.children[idx]:
self.children[idx] = PostfixNode()
cnt, sub = self.children[idx].insert(s, i+1)
if cnt >= self.cnt or i == 0:
return cnt, sub
else:
return self.cnt, s[:i]
def get_most_frequent_words(s):
root = PostfixNode()
most_frequent = 0
words = ""
for i in range(len(s)):
substr = s[i:]
cnt, tmp = root.insert(substr, 0)
if cnt > most_frequent or (cnt == most_frequent and len(tmp) > len(words)):
most_frequent = cnt
words = tmp
return words
s = "aaaa"
print get_most_frequent_words(s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment