Skip to content

Instantly share code, notes, and snippets.

@kaja47
Created April 2, 2016 23:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaja47/66a6f6e36ad88e04c0b14904a0cd2f23 to your computer and use it in GitHub Desktop.
Save kaja47/66a6f6e36ad88e04c0b14904a0cd2f23 to your computer and use it in GitHub Desktop.
quick and dirty sketch of burstsort
import java.util.Arrays
// BurstSort
// Cache-Conscious Sorting of Large Sets of Strings with Dynamic Tries
// http://goanna.cs.rmit.edu.au/~jz/fulltext/alenex03.pdf
class BurstLeaf(initSize: Int) {
var size: Int = 0
var values: Array[String] = new Array[String](initSize)
def add(str: String) = {
values(size) = str
size += 1
}
def resize(factor: Int) = {
values = Arrays.copyOfRange(values, 0, values.length * factor)
}
override def toString = values.filter(_ != null).mkString("BurstLeaf(", ",", ")")
}
class BurstTrie {
val initSize = 16
val resizeFactor = 8
val maxSize = 1024
var size = 0
val root: Array[AnyRef] = new Array[AnyRef](257)
def leaves = {
def leavesOf(node: Array[AnyRef]): Seq[BurstLeaf] =
node.flatMap { child =>
child match {
case null => Seq()
case leaf: BurstLeaf => Seq(leaf)
case node: Array[AnyRef] => leavesOf(node)
}
}
leavesOf(root)
}
def sorted = {
var pos = 0
val res = new Array[String](size)
def run(node: Array[AnyRef]): Unit = {
doNode(node(256))
for (i <- 0 until 256) {
doNode(node(i))
}
}
def doNode(n: AnyRef) = n match {
case null =>
case leaf: BurstLeaf =>
Arrays.sort(leaf.values.asInstanceOf[Array[Object]], 0, leaf.size)
System.arraycopy(leaf.values, 0, res, pos, leaf.size)
pos += leaf.size
case node: Array[AnyRef] => run(node)
}
run(root)
res
}
def ++= (strs: Seq[String]): this.type = {
for (str <- strs) this += str
this
}
def += (str: String): this.type = {
def add(str: String, pos: Int, node: Array[AnyRef]): Unit = {
val char = if (str.length > pos) str.charAt(pos) else 256
node(char) match {
case null =>
val leaf = new BurstLeaf(initSize)
leaf.add(str)
node(char) = leaf
case leaf: BurstLeaf =>
// add string, possibly resize or burst
if (leaf.size == leaf.values.length) {
if (leaf.size * resizeFactor <= maxSize || char == 256) { // resize
leaf.resize(resizeFactor)
leaf.add(str)
} else { // burst
println("burst")
val newNode = new Array[AnyRef](257)
node(char) = newNode
for (i <- 0 until leaf.size) {
add(leaf.values(i), pos+1, newNode)
}
add(str, pos+1, newNode)
}
} else {
leaf.add(str)
}
case child: Array[AnyRef] =>
add(str, pos+1, child)
}
}
add(str, 0, root)
size += 1
this
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment