Skip to content

Instantly share code, notes, and snippets.

@syusui-s
Last active August 24, 2022 01:48
Show Gist options
  • Save syusui-s/69846dacf623e6e9ee2b9604e17b0619 to your computer and use it in GitHub Desktop.
Save syusui-s/69846dacf623e6e9ee2b9604e17b0619 to your computer and use it in GitHub Desktop.
# Created by https://www.toptal.com/developers/gitignore/api/sbt,scala,metals
# Edit at https://www.toptal.com/developers/gitignore?templates=sbt,scala,metals
### Metals ###
.metals/
.bloop/
project/**/metals.sbt
### SBT ###
# Simple Build Tool
# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control
dist/*
target/
lib_managed/
src_managed/
project/boot/
project/plugins/project/
.history
.cache
.lib/
### Scala ###
*.class
*.log
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
# End of https://www.toptal.com/developers/gitignore/api/sbt,scala,metals
// TODO 中央値を計算して求めるようにする?
// 二分探索を実行できるようにする
import scala.collection.Searching._
// https://stackoverflow.com/questions/15718506/scala-how-to-print-case-classes-like-pretty-printed-tree
def time[T](f: () => T): (T, Long) = {
val start = System.nanoTime()
val res = f()
val takes = System.nanoTime() - start
(res, takes)
}
def formatNanoSec(v: Long): String = {
val ns = v % 1000
val us = v / 1000 % 1000
val ms = v / 1000000L
s"${ms}ms ${us}us ${ns}ns"
}
// Capacityの修正が必要
// N = ポインタの数
/**
* B+ Tree
*
* B+木は、B木と違い、リーフ側にしか値を持たない
* セパレータキーの左側には、キー未満のブランチが、
* セパレータキーの右側には、キー以上のブランチがくる
*/
sealed trait BTree[+K, +V] {
def capacity: Int
// def has(value: T): Boolean
def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1]
def find[K1 >: K: Ordering](key: K1): Option[V]
def isOverflow: Boolean
}
object BTree {
def empty(capacity: Int): BTree[Nothing, Nothing] = Empty(capacity)
}
case class Branch[+K: Ordering, V](
capacity: Int,
separators: IndexedSeq[K],
subtrees: IndexedSeq[BTree[K, V]],
) extends BTree[K, V] {
require(capacity >= 1, "capacity should be greater than 1")
require(separators.length <= capacity, "length of separators should be less than or equal to capacity")
require(separators.length + 1 == subtrees.length, s"length of subtrees should be separators.length + 1, ${separators.length} ${subtrees.length}")
override def find[K1 >: K: Ordering](key: K1): Option[V] = {
separators.search(key) match {
case InsertionPoint(index) =>
subtrees(index).find(key)
case Found(index) =>
// セパレーターキーの右側には、そのキー"以上"のキーを持つブランチがあるので
// 見つかった場合、見つかった位置ではなく、見つかった位置の右側を見に行かないといけない
subtrees(index + 1).find(key)
}
}
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = {
val (index, newTree) = separators.search(key) match {
case InsertionPoint(i) =>
(i, subtrees(i).insert(key, value))
case Found(i) =>
(i+1, subtrees(i+1).insert(key, value))
}
newTree match {
case Split(_, k, l, r) =>
insertLiftedKey(index, k, l, r)
case t =>
copy(subtrees = subtrees.updated(index, t))
}
}
def insertLiftedKey[K1 >: K: Ordering, V1 >: V](index: Int, liftedKey: K1, left: BTree[K1, V1], right: BTree[K1, V1]): BTree[K1, V1] = {
val (s1, s2) = separators.splitAt(index)
val (t1, t2) = subtrees.splitAt(index)
val newSeparators = s1 :+ liftedKey :++ s2
val newSubtrees = t1 :+ left :+ right :++ t2.tail
if (isOverflow) {
val halfIndex = newSeparators.length / 2
val (ns1, ns2) = newSeparators.splitAt(halfIndex)
val (nt1, nt2) = newSubtrees.splitAt(halfIndex + 1)
Split(capacity, ns2(0), Branch(capacity, ns1, nt1), Branch(capacity, ns2, Empty(capacity) +: nt2))
} else {
Branch(capacity, newSeparators, newSubtrees)
}
}
override def isOverflow: Boolean = {
subtrees.length >= capacity + 1
}
}
case class Leaf[+K: Ordering, +V](
capacity: Int,
keys: IndexedSeq[K],
values: IndexedSeq[V],
) extends BTree[K, V] {
require(capacity >= 1, "capacity should be greater than or equal to 1")
require(keys.length == values.length, "keys should have a same length to values")
require(keys.length <= capacity + 1, "keys length should be less than or equal to capacity + 1")
// Leafには、単純に値が入っているだけ
// なので、二分探索して見つけるだけで良い
override def find[K1 >: K: Ordering](key: K1): Option[V] = {
keys.search(key) match {
case InsertionPoint(_) => None
case Found(index) => Some(values(index))
}
}
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = {
if (isOverflow) {
split.insert(key, value)
} else {
keys.search(key) match {
case InsertionPoint(index) => {
val (k1, k2) = keys.splitAt(index)
val (v1, v2) = values.splitAt(index)
Leaf(capacity, k1 :+ key :++ k2, v1 :+ value :++ v2)
}
case Found(index) => {
// 置き換える
val (v1, v2) = values.splitAt(index)
Leaf(capacity, keys, v1 :+ value :++ v2.tail)
}
}
}
}
def split: BTree[K, V] = {
require(isOverflow, "not overflow")
val halfIndex = keys.length / 2
val (k1, k2) = keys.splitAt(halfIndex)
val (v1, v2) = values.splitAt(halfIndex)
Split(capacity, k2(0), Leaf(capacity, k1, v1), Leaf(capacity, k2, v2))
}
override def isOverflow: Boolean = keys.length >= capacity + 1
}
case class Split[+K: Ordering, +V](capacity: Int, liftedKey: K, left: BTree[K, V], right: BTree[K, V]) extends BTree[K, V] {
require(capacity >= 1)
override def find[K1 >: K: Ordering](key: K1): Option[V] = {
val cmp = implicitly[Ordering[K1]].compare(key, liftedKey)
if (cmp < 0)
left.find(key)
else
right.find(key)
}
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = {
// Split.insert always succeeds?
// No, Split can be root node.
val cmp = implicitly[Ordering[K1]].compare(key, liftedKey)
if (cmp < 0) {
left.insert(key, value) match {
case Split(_, k, l, r) => Branch(capacity, IndexedSeq(k, liftedKey), IndexedSeq(l, r, right))
case l => copy(left = l)
}
} else {
right.insert(key, value) match {
case Split(_, k, l, r) => Branch(capacity, IndexedSeq(liftedKey, k), IndexedSeq(left, l, r))
case r => copy(right = r)
}
}
}
override def isOverflow: Boolean = true
}
case class Empty(capacity: Int) extends BTree[Nothing, Nothing] {
require(capacity >= 1, "capacity should be greater than or equal to 1")
override def find[K: Ordering](key: K): Option[Nothing] = None
override def insert[K: Ordering, V](key: K, value: V): BTree[K, V] =
Leaf(capacity, keys = IndexedSeq(key), values = IndexedSeq(value))
override def isOverflow: Boolean = true
}
// 探索
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(2).ensuring(_ == Some("two"))
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(5).ensuring(_ == Some("five"))
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(3).ensuring(_ == None)
Empty(3).find(0).ensuring(_ == None)
val br1 = Branch(
capacity = 2,
separators = IndexedSeq(3),
subtrees = IndexedSeq(
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")),
Leaf(2, IndexedSeq(5, 8), IndexedSeq("five", "eight")),
),
)
Seq((1, "one"), (2, "two"), (5, "five"), (8, "eight")).foreach { case (k, v) =>
br1.find(k).ensuring(_ == Some(v), s"Not Found: ${k}")
}
br1.find(0).ensuring(_ == None)
val sp1 = Split(2, 13, Leaf(2, IndexedSeq(5), IndexedSeq("V")), Leaf(2, IndexedSeq(15), IndexedSeq("XV")))
Seq((5, "V"), (15, "XV")).foreach { case (k, v) =>
sp1.find(k).ensuring(_ == Some(v))
}
// オーバーフロー
Leaf(2, IndexedSeq(1), IndexedSeq(1)).isOverflow
.ensuring(_ == false)
Leaf(2, IndexedSeq(1, 2, 3), IndexedSeq(1, 2, 3)).isOverflow
.ensuring(_ == true)
Empty(2).isOverflow
.ensuring(_ == true)
br1.isOverflow
.ensuring(_ == false)
val br2 = Branch(
capacity = 2,
separators = IndexedSeq(1, 5),
subtrees = IndexedSeq(
Leaf(2, IndexedSeq(-1, 0), IndexedSeq("minus one", "zero")),
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")),
Leaf(2, IndexedSeq(5, 8), IndexedSeq("five", "eight")),
),
)
// children not overflow
br2.isOverflow
.ensuring(_ == true)
// 分割
Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")).split
.ensuring(_ ==
Split(
2,
13,
Leaf(2, IndexedSeq(10), IndexedSeq("X")),
Leaf(2, IndexedSeq(13, 15), IndexedSeq("XIII", "XV"))
)
)
// 挿入
/// 空き有
Empty(2).insert(1, "I")
.ensuring(_ == Leaf(2, IndexedSeq(1), IndexedSeq("I")))
Leaf(2, IndexedSeq(13), IndexedSeq("XIII")).insert(10, "X")
.ensuring(_ == Leaf(2, IndexedSeq(10, 13), IndexedSeq("X", "XIII")))
Leaf(2, IndexedSeq(10, 13), IndexedSeq("X", "XIII")).insert(15, "XV")
.ensuring(_ == Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")))
br1.insert(3, "three")
.ensuring(_ == Branch(
2,
IndexedSeq(3),
IndexedSeq(
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")),
Leaf(2, IndexedSeq(3, 5, 8), IndexedSeq("three", "five", "eight")),
),
))
br2.insert(15, "十五")
/// オーバーフロー時
val l1 = Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV"))
l1.isOverflow
.ensuring(_ == true)
l1.insert(4, "IV")
.ensuring(_ ==
Split(2, 13,
Leaf(2, IndexedSeq(4, 10), IndexedSeq("IV", "X")),
Leaf(2, IndexedSeq(13, 15), IndexedSeq("XIII", "XV"))
)
)
val br3 = Branch(
2,
IndexedSeq(10, 20),
IndexedSeq(
Leaf(2, IndexedSeq(5, 6, 7), IndexedSeq("五", "六", "七")),
Leaf(2, IndexedSeq(10, 13, 14), IndexedSeq("十", "十三", "十四")),
Leaf(2, IndexedSeq(20, 23, 24), IndexedSeq("二十", "二十三", "二十四")),
)
).insert(12, "十二")
br3.ensuring(_ == Split(
2,
13,
Branch(
2,
IndexedSeq(10),
IndexedSeq(
Leaf(2, IndexedSeq(5, 6, 7), IndexedSeq("五", "六", "七")),
Leaf(2, IndexedSeq(10, 12), IndexedSeq("十", "十二")),
)
),
Branch(
2,
IndexedSeq(13, 20),
IndexedSeq(
Empty(2),
Leaf(2, IndexedSeq(13, 14), IndexedSeq("十三", "十四")),
Leaf(2, IndexedSeq(20, 23, 24), IndexedSeq("二十", "二十三", "二十四")),
)
)
))
br3.find(12).ensuring(_ == Some("十二"))
/// 置換
Leaf(3, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")).insert(15, "十五")
.ensuring(_ == Leaf(3, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "十五")))
// val randomList = List.fill(1<<6)(Math.round(Math.random() * 65535))
val randomList = (1L to 42L).toSeq
val (tr1, t) = time { () =>
randomList.foldLeft(
Empty(5).asInstanceOf[BTree[Long, Long]]
) { (t: BTree[Long, Long], v: Long) =>
t.insert(v, v)
}
}
println(s"time: ${formatNanoSec(t)}")
val (_, t2) = time { () =>
randomList.foreach { v =>
tr1.find(v).ensuring(_ == Some(v))
}
}
println(s"time: ${formatNanoSec(t2)}")
{
val (tr1, t) = time { () =>
randomList.foldLeft(
new java.util.TreeMap[Long, Long]
) { (t: java.util.TreeMap[Long, Long], v: Long) =>
t.put(v, v)
t
}
}
println(s"time: ${formatNanoSec(t)}")
val (_, t2) = time { () =>
randomList.foreach { v =>
tr1.get(v).ensuring(_ == v)
}
}
println(s"time: ${formatNanoSec(t2)}")
1
}
case class GraphvizNode(name: String, label: String, children: Seq[GraphvizNode]) {
def nodeDefinition: String = {
s"""
${name} [label="${label}"]
${children.map(_.nodeDefinition).mkString("\n")}
""".stripMargin
}
def arrowDefinition: String = {
s"""
${children.map(c => s"${name}:${c.name} -> ${c.name}").mkString("\n")}
${children.map(_.arrowDefinition).mkString("\n")}
""".stripMargin
}
}
class BTreeGraphviz[K, V](val btree: BTree[K, V]) {
override def toString: String = {
val g = graphviz
s"""
digraph G {
node [shape=record];
${g.nodeDefinition}
${g.arrowDefinition}
}
""".stripMargin
}
def graphviz: GraphvizNode = {
btree match {
case Branch(_, separators, subtrees) =>
val sepLabel = separators.mkString("|")
val subtreeNodes = subtrees.map(t => new BTreeGraphviz[K, V](t).graphviz)
val subtreeRefLabels = subtreeNodes.map(t => s"<${t.name}> ●").mkString("|")
val label = s"""{{${sepLabel}}|{${subtreeRefLabels}}}"""
GraphvizNode(name, label, subtreeNodes)
case Leaf(_, keys, values) =>
val keysLabel = keys.mkString("|")
val valuesLabel = values.mkString("|")
val label = s"""{{${keysLabel}}|{${valuesLabel}}}"""
GraphvizNode(name, label, Seq())
case Split(_, key, left, right) =>
val lNode = new BTreeGraphviz[K, V](left).graphviz
val rNode = new BTreeGraphviz[K, V](right).graphviz
val label = s"""{${key}|{<${lNode.name}>●|<${rNode.name}> ●}}"""
GraphvizNode(name, label, Seq(lNode, rNode))
case Empty(_) =>
GraphvizNode(name, "", Seq())
}
}
def name: String = {
(Math.round(Math.abs(btree.hashCode) * Math.random())).toString
}
}
println(new BTreeGraphviz[Long, Long](tr1).toString)
// https://stackoverflow.com/a/55032051
def pprint(obj: Any, depth: Int = 0, paramName: Option[String] = None): Unit = {
val indent = " " * depth
val prettyName = paramName.fold("")(x => s"$x: ")
val ptype = obj match { case _: Iterable[Any] => "" case obj: Product => obj.productPrefix case _ => obj.toString }
println(s"$indent$prettyName$ptype")
obj match {
case seq: Iterable[Any] =>
seq.zipWithIndex.foreach { case (v, idx) =>
pprint(v, depth + 1, Some(s"${idx}"))
}
case obj: Product =>
(obj.productIterator zip obj.productElementNames)
.foreach { case (subObj, paramName) => pprint(subObj, depth + 1, Some(paramName)) }
case _ =>
}
}
case class Slots[K, V](
capacity: Int,
splitPointer: Int,
entries: Seq[Option[Bucket[K, V]]],
) {
val bucketCapacity = 2
val doubleCapacity = capacity * 2
require(capacity > 0)
require(entries.length == capacity || entries.length == doubleCapacity)
require(splitPointer >= 0 && splitPointer < capacity)
def insert(key: K, value: V): Slots[K, V] = {
val idx = indexFor(key)
entries(idx) match {
case Some(bucket) => {
val result = bucket.push(key, value)
val updatedEntries = entries.updated(idx, Some(result.bucket))
val newSlots = copy(entries = updatedEntries)
if (result.overflow) newSlots.split
else newSlots
}
case None => {
val result = Bucket(capacity = bucketCapacity).push(key, value)
result.ensuring(_.overflow == false)
copy(entries = entries.updated(idx, Some(result.bucket)))
}
}
}
def find(key: K): Option[V] = {
val bucket = entries(indexFor(key))
bucket.flatMap { b => b.find(key) }
}
def indexFor(key: K): Int = {
val hash1 = key.hashCode % capacity
if (hash1 < splitPointer) {
key.hashCode % doubleCapacity
} else {
key.hashCode % capacity
}
}
def split: Slots[K, V] = {
val originalBucket = entries(splitPointer)
val newSlots: Slots[K, V] = {
val newPointer = (splitPointer + 1) % capacity
val newCapacity =
if (newPointer == 0) capacity * 2
else capacity
val cleared = entries.updated(splitPointer, None)
val newEntries = {
if (cleared.length < doubleCapacity) cleared ++ Seq.fill(capacity)(None)
else cleared
}
.ensuring(_.length == doubleCapacity)
val splitSlots = Slots(
capacity = newCapacity,
entries = newEntries,
splitPointer = newPointer,
)
splitSlots
}
/*
println("=====SPLIT=====")
println(originalBucket)
println(newSlots)
println("===============")
*/
originalBucket match {
case Some(bucket) => {
def appendAll(slots: Slots[K, V], bucket: Bucket[K, V]): Slots[K, V] = {
val newSlots = bucket.entries.foldLeft(slots) { case (s, (k, v)) =>
s.insert(k, v)
}
bucket.nextBucket match {
case Some(next) => appendAll(newSlots, next)
case None => newSlots
}
}
appendAll(newSlots, bucket)
}
case None => newSlots
}
}
}
object Slots {
def create[K, V](): Slots[K, V] = {
withCapacity(32)
}
def withCapacity[K, V](capacity: Int): Slots[K, V] = {
new Slots(
capacity = capacity,
entries = Seq.fill(capacity)(None),
splitPointer = 0,
)
}
}
case class BucketPushResult[K, V](
bucket: Bucket[K, V],
overflow: Boolean
)
case class Bucket[K, V](
capacity: Int = 32,
entries: Seq[Tuple2[K, V]] = Seq(),
nextBucket: Option[Bucket[K, V]] = None,
) {
require(capacity > 0, "capacity should be > 0")
require(entries.length >= 0, "length of entries should be >= 0")
require(entries.length <= capacity, "length of entries should be less than capacity")
def push(key: K, value: V): BucketPushResult[K, V] = {
if (isFull) {
nextBucket match {
case Some(next) => {
val result = next.push(key, value)
val newBucket = copy(nextBucket = Some(result.bucket))
result.copy(bucket = newBucket)
}
case None => {
val result = Bucket(capacity = capacity).push(key, value)
.ensuring(_.bucket.capacity == capacity)
.ensuring(_.bucket.entries.length == 1)
val newBucket = copy(nextBucket = Some(result.bucket))
BucketPushResult(
bucket = newBucket,
overflow = true
)
}
}
} else {
// TODO how we overwrite value
BucketPushResult(
bucket = copy(entries = entries :+ (key -> value)),
overflow = false,
)
}
}
def find(key: K): Option[V] = {
entries.find { case (k, _) => k == key } match {
case Some((_, v)) => Some(v)
case None => {
nextBucket.flatMap { _.find(key) }
}
}
}
def isFull: Boolean = {
entries.length == capacity
}
}
Bucket[Int, Int](capacity = 1).push(1, 1).bucket
.ensuring(_ == Bucket(1, Seq((1, 1))), None)
Bucket[Int, Int](capacity = 1).push(1, 1).bucket.push(2, 2).bucket
.ensuring(_ == Bucket(1, Seq((1, 1)), Some(Bucket(1, Seq((2, 2)), None))))
Bucket[Int, Int](capacity = 1).push(1, 1).bucket.find(1)
.ensuring(_ == Some(1))
val s1 = (1 to 512).foldLeft(Slots.withCapacity[Int, Int](1)) { (s, n) => s.insert(n, n) }
(1 to 512).forall { e => s1.find(e).nonEmpty }.ensuring(_ == true)
pprint(s1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment