Last active
January 8, 2019 00:00
-
-
Save nremond/fb63a27b6291053ca4dd1de81135de84 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.collection.mutable | |
case class Node( | |
value: Option[Char] = None, | |
leaves: mutable.ArrayBuffer[String] = mutable.ArrayBuffer.empty, | |
nodes: mutable.ArrayBuffer[Node] = mutable.ArrayBuffer.empty) { | |
def checkNodes(suf: String): Boolean = { | |
val c = suf.head | |
val node = nodes.find(n => n.value == Some(c)) | |
node.foreach(n => n.addSuffix(suf.tail)) | |
node.isDefined | |
} | |
def checkLeaves(suf: String): Unit = { | |
val c = suf.head | |
val i = leaves.indexWhere(_.head == c) | |
if(i == -1) { | |
leaves.append(suf) | |
} else { | |
val leaf = leaves(i) | |
val node = Node(Some(leaf.head)) | |
node.addSuffix(suf.tail) | |
node.addSuffix(leaf.tail) | |
nodes.append(node) | |
} | |
} | |
def addSuffix(suf: String): Unit = { | |
if(suf.length > 0) { | |
if(!checkNodes(suf)) { | |
checkLeaves(suf) | |
} | |
} | |
} | |
def getLongestRepeatedSubString(): String = { | |
val substring = | |
if(nodes.isEmpty) | |
"" | |
else | |
nodes.map(_.getLongestRepeatedSubString()).maxBy(_.length) | |
value.fold("")(_.toString) + substring | |
} | |
def listRepeatedSubString(): Seq[String] = { | |
if(nodes.isEmpty) | |
Seq(value.fold("")(_.toString)) | |
else | |
nodes | |
.toSeq | |
.flatMap { n => | |
n.listRepeatedSubString() | |
} | |
.map(value.fold("")(_.toString) + _) | |
} | |
} | |
object SuffixTree { | |
def apply(s: String): Node = { | |
val node = Node() | |
(0 until s.length) | |
.foreach(i => node.addSuffix(s.substring(i))) | |
node | |
} | |
} | |
def cleanSuffixes(xs: Seq[String]): Seq[String] = { | |
xs.sortBy(_.length).filter(s => xs.indexWhere(ss => ss!=s && ss.contains(s)) == -1) | |
} | |
val st = SuffixTree("reninicolniacolacolas$") | |
// println(st) | |
println("--> longest:" + st.getLongestRepeatedSubString()) | |
println("--> all:" + st.listRepeatedSubString()) | |
println("--> longest distincts:" + cleanSuffixes(st.listRepeatedSubString())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment