Last active
April 13, 2019 17:11
-
-
Save baratgabor/c2d8a6096c53cc9a24f5403f87b4abed to your computer and use it in GitHub Desktop.
Simple suffix trie for 26 char English alphabet with O(n^2) build time and linear (search phrase length) search time. Ascertains longest repeated substring automatically during build. Iterative traversal. No practical use due to the quadratic build time and immense memory use typical for tries.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SuffixTrie | |
{ | |
private TrieNode _root = new TrieNode(); | |
private string _LRS = string.Empty; // Longest repeated substring | |
private SuffixTrie(string value) | |
=> AddWord(value); | |
public static SuffixTrie Build(string value) | |
=> new SuffixTrie(value); | |
public bool GetIndexesOf(string value, out IEnumerable<int> indexes) | |
{ | |
if (string.IsNullOrEmpty(value)) | |
throw new ArgumentException(nameof(value)); | |
var node = _root; | |
foreach (var c in value) | |
{ | |
node = node.Children[c - 'a']; | |
if (node == null) | |
break; | |
} | |
indexes = node?.StartIndexes ?? new List<int>(); | |
return indexes.Any(); | |
} | |
public bool Contains(string value) | |
{ | |
var node = _root; | |
foreach (var c in value) | |
{ | |
node = node.Children[c - 'a']; | |
if (node == null) return false; | |
} | |
return true; | |
} | |
public string GetLongestRepeatedSubstr() | |
=> _LRS; | |
private void AddWord(string value) | |
{ | |
var chars = value.ToLower().ToArray(); | |
for (int i = 0; i < chars.Length; i++) | |
AddSuffix(chars, i); | |
} | |
private void AddSuffix(char[] chars, int startIndex) | |
{ | |
var node = _root; | |
var LR = 0; | |
for (int i = startIndex; i < chars.Length; i++) | |
{ | |
if (node.Children[chars[i] - 'a'] == null) | |
{ | |
node.Branches++; | |
node = node.Children[chars[i] - 'a'] = new TrieNode(); | |
} | |
else | |
{ | |
node = node.Children[chars[i] - 'a']; | |
node.Repeats++; | |
LR++; | |
} | |
node.StartIndexes.Add(startIndex); | |
} | |
if (LR > _LRS.Length) | |
_LRS = GetSlice(chars, startIndex, LR); | |
node.EndPoints++; | |
} | |
private string GetSlice(char[] chars, int start, int len) | |
{ | |
var sb = new StringBuilder(); | |
for (int i = start; i < start + len; i++) | |
sb.Append(chars[i]); | |
return sb.ToString(); | |
} | |
class TrieNode | |
{ | |
public int Repeats; | |
public int Branches; | |
public int EndPoints; | |
public List<int> StartIndexes = new List<int>(); | |
public TrieNode[] Children = new TrieNode[26]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment