Last active
August 24, 2022 01:48
-
-
Save syusui-s/69846dacf623e6e9ee2b9604e17b0619 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Created by https://www.toptal.com/developers/gitignore/api/sbt,scala,metals | |
# Edit at https://www.toptal.com/developers/gitignore?templates=sbt,scala,metals | |
### Metals ### | |
.metals/ | |
.bloop/ | |
project/**/metals.sbt | |
### SBT ### | |
# Simple Build Tool | |
# http://www.scala-sbt.org/release/docs/Getting-Started/Directories.html#configuring-version-control | |
dist/* | |
target/ | |
lib_managed/ | |
src_managed/ | |
project/boot/ | |
project/plugins/project/ | |
.history | |
.cache | |
.lib/ | |
### Scala ### | |
*.class | |
*.log | |
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml | |
hs_err_pid* | |
# End of https://www.toptal.com/developers/gitignore/api/sbt,scala,metals |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// TODO 中央値を計算して求めるようにする? | |
// 二分探索を実行できるようにする | |
import scala.collection.Searching._ | |
// https://stackoverflow.com/questions/15718506/scala-how-to-print-case-classes-like-pretty-printed-tree | |
def time[T](f: () => T): (T, Long) = { | |
val start = System.nanoTime() | |
val res = f() | |
val takes = System.nanoTime() - start | |
(res, takes) | |
} | |
def formatNanoSec(v: Long): String = { | |
val ns = v % 1000 | |
val us = v / 1000 % 1000 | |
val ms = v / 1000000L | |
s"${ms}ms ${us}us ${ns}ns" | |
} | |
// Capacityの修正が必要 | |
// N = ポインタの数 | |
/** | |
* B+ Tree | |
* | |
* B+木は、B木と違い、リーフ側にしか値を持たない | |
* セパレータキーの左側には、キー未満のブランチが、 | |
* セパレータキーの右側には、キー以上のブランチがくる | |
*/ | |
sealed trait BTree[+K, +V] { | |
def capacity: Int | |
// def has(value: T): Boolean | |
def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] | |
def find[K1 >: K: Ordering](key: K1): Option[V] | |
def isOverflow: Boolean | |
} | |
object BTree { | |
def empty(capacity: Int): BTree[Nothing, Nothing] = Empty(capacity) | |
} | |
case class Branch[+K: Ordering, V]( | |
capacity: Int, | |
separators: IndexedSeq[K], | |
subtrees: IndexedSeq[BTree[K, V]], | |
) extends BTree[K, V] { | |
require(capacity >= 1, "capacity should be greater than 1") | |
require(separators.length <= capacity, "length of separators should be less than or equal to capacity") | |
require(separators.length + 1 == subtrees.length, s"length of subtrees should be separators.length + 1, ${separators.length} ${subtrees.length}") | |
override def find[K1 >: K: Ordering](key: K1): Option[V] = { | |
separators.search(key) match { | |
case InsertionPoint(index) => | |
subtrees(index).find(key) | |
case Found(index) => | |
// セパレーターキーの右側には、そのキー"以上"のキーを持つブランチがあるので | |
// 見つかった場合、見つかった位置ではなく、見つかった位置の右側を見に行かないといけない | |
subtrees(index + 1).find(key) | |
} | |
} | |
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = { | |
val (index, newTree) = separators.search(key) match { | |
case InsertionPoint(i) => | |
(i, subtrees(i).insert(key, value)) | |
case Found(i) => | |
(i+1, subtrees(i+1).insert(key, value)) | |
} | |
newTree match { | |
case Split(_, k, l, r) => | |
insertLiftedKey(index, k, l, r) | |
case t => | |
copy(subtrees = subtrees.updated(index, t)) | |
} | |
} | |
def insertLiftedKey[K1 >: K: Ordering, V1 >: V](index: Int, liftedKey: K1, left: BTree[K1, V1], right: BTree[K1, V1]): BTree[K1, V1] = { | |
val (s1, s2) = separators.splitAt(index) | |
val (t1, t2) = subtrees.splitAt(index) | |
val newSeparators = s1 :+ liftedKey :++ s2 | |
val newSubtrees = t1 :+ left :+ right :++ t2.tail | |
if (isOverflow) { | |
val halfIndex = newSeparators.length / 2 | |
val (ns1, ns2) = newSeparators.splitAt(halfIndex) | |
val (nt1, nt2) = newSubtrees.splitAt(halfIndex + 1) | |
Split(capacity, ns2(0), Branch(capacity, ns1, nt1), Branch(capacity, ns2, Empty(capacity) +: nt2)) | |
} else { | |
Branch(capacity, newSeparators, newSubtrees) | |
} | |
} | |
override def isOverflow: Boolean = { | |
subtrees.length >= capacity + 1 | |
} | |
} | |
case class Leaf[+K: Ordering, +V]( | |
capacity: Int, | |
keys: IndexedSeq[K], | |
values: IndexedSeq[V], | |
) extends BTree[K, V] { | |
require(capacity >= 1, "capacity should be greater than or equal to 1") | |
require(keys.length == values.length, "keys should have a same length to values") | |
require(keys.length <= capacity + 1, "keys length should be less than or equal to capacity + 1") | |
// Leafには、単純に値が入っているだけ | |
// なので、二分探索して見つけるだけで良い | |
override def find[K1 >: K: Ordering](key: K1): Option[V] = { | |
keys.search(key) match { | |
case InsertionPoint(_) => None | |
case Found(index) => Some(values(index)) | |
} | |
} | |
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = { | |
if (isOverflow) { | |
split.insert(key, value) | |
} else { | |
keys.search(key) match { | |
case InsertionPoint(index) => { | |
val (k1, k2) = keys.splitAt(index) | |
val (v1, v2) = values.splitAt(index) | |
Leaf(capacity, k1 :+ key :++ k2, v1 :+ value :++ v2) | |
} | |
case Found(index) => { | |
// 置き換える | |
val (v1, v2) = values.splitAt(index) | |
Leaf(capacity, keys, v1 :+ value :++ v2.tail) | |
} | |
} | |
} | |
} | |
def split: BTree[K, V] = { | |
require(isOverflow, "not overflow") | |
val halfIndex = keys.length / 2 | |
val (k1, k2) = keys.splitAt(halfIndex) | |
val (v1, v2) = values.splitAt(halfIndex) | |
Split(capacity, k2(0), Leaf(capacity, k1, v1), Leaf(capacity, k2, v2)) | |
} | |
override def isOverflow: Boolean = keys.length >= capacity + 1 | |
} | |
case class Split[+K: Ordering, +V](capacity: Int, liftedKey: K, left: BTree[K, V], right: BTree[K, V]) extends BTree[K, V] { | |
require(capacity >= 1) | |
override def find[K1 >: K: Ordering](key: K1): Option[V] = { | |
val cmp = implicitly[Ordering[K1]].compare(key, liftedKey) | |
if (cmp < 0) | |
left.find(key) | |
else | |
right.find(key) | |
} | |
override def insert[K1 >: K: Ordering, V1 >: V](key: K1, value: V1): BTree[K1, V1] = { | |
// Split.insert always succeeds? | |
// No, Split can be root node. | |
val cmp = implicitly[Ordering[K1]].compare(key, liftedKey) | |
if (cmp < 0) { | |
left.insert(key, value) match { | |
case Split(_, k, l, r) => Branch(capacity, IndexedSeq(k, liftedKey), IndexedSeq(l, r, right)) | |
case l => copy(left = l) | |
} | |
} else { | |
right.insert(key, value) match { | |
case Split(_, k, l, r) => Branch(capacity, IndexedSeq(liftedKey, k), IndexedSeq(left, l, r)) | |
case r => copy(right = r) | |
} | |
} | |
} | |
override def isOverflow: Boolean = true | |
} | |
case class Empty(capacity: Int) extends BTree[Nothing, Nothing] { | |
require(capacity >= 1, "capacity should be greater than or equal to 1") | |
override def find[K: Ordering](key: K): Option[Nothing] = None | |
override def insert[K: Ordering, V](key: K, value: V): BTree[K, V] = | |
Leaf(capacity, keys = IndexedSeq(key), values = IndexedSeq(value)) | |
override def isOverflow: Boolean = true | |
} | |
// 探索 | |
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(2).ensuring(_ == Some("two")) | |
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(5).ensuring(_ == Some("five")) | |
Leaf(3, IndexedSeq(1, 2, 5), IndexedSeq("one", "two", "five")).find(3).ensuring(_ == None) | |
Empty(3).find(0).ensuring(_ == None) | |
val br1 = Branch( | |
capacity = 2, | |
separators = IndexedSeq(3), | |
subtrees = IndexedSeq( | |
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")), | |
Leaf(2, IndexedSeq(5, 8), IndexedSeq("five", "eight")), | |
), | |
) | |
Seq((1, "one"), (2, "two"), (5, "five"), (8, "eight")).foreach { case (k, v) => | |
br1.find(k).ensuring(_ == Some(v), s"Not Found: ${k}") | |
} | |
br1.find(0).ensuring(_ == None) | |
val sp1 = Split(2, 13, Leaf(2, IndexedSeq(5), IndexedSeq("V")), Leaf(2, IndexedSeq(15), IndexedSeq("XV"))) | |
Seq((5, "V"), (15, "XV")).foreach { case (k, v) => | |
sp1.find(k).ensuring(_ == Some(v)) | |
} | |
// オーバーフロー | |
Leaf(2, IndexedSeq(1), IndexedSeq(1)).isOverflow | |
.ensuring(_ == false) | |
Leaf(2, IndexedSeq(1, 2, 3), IndexedSeq(1, 2, 3)).isOverflow | |
.ensuring(_ == true) | |
Empty(2).isOverflow | |
.ensuring(_ == true) | |
br1.isOverflow | |
.ensuring(_ == false) | |
val br2 = Branch( | |
capacity = 2, | |
separators = IndexedSeq(1, 5), | |
subtrees = IndexedSeq( | |
Leaf(2, IndexedSeq(-1, 0), IndexedSeq("minus one", "zero")), | |
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")), | |
Leaf(2, IndexedSeq(5, 8), IndexedSeq("five", "eight")), | |
), | |
) | |
// children not overflow | |
br2.isOverflow | |
.ensuring(_ == true) | |
// 分割 | |
Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")).split | |
.ensuring(_ == | |
Split( | |
2, | |
13, | |
Leaf(2, IndexedSeq(10), IndexedSeq("X")), | |
Leaf(2, IndexedSeq(13, 15), IndexedSeq("XIII", "XV")) | |
) | |
) | |
// 挿入 | |
/// 空き有 | |
Empty(2).insert(1, "I") | |
.ensuring(_ == Leaf(2, IndexedSeq(1), IndexedSeq("I"))) | |
Leaf(2, IndexedSeq(13), IndexedSeq("XIII")).insert(10, "X") | |
.ensuring(_ == Leaf(2, IndexedSeq(10, 13), IndexedSeq("X", "XIII"))) | |
Leaf(2, IndexedSeq(10, 13), IndexedSeq("X", "XIII")).insert(15, "XV") | |
.ensuring(_ == Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV"))) | |
br1.insert(3, "three") | |
.ensuring(_ == Branch( | |
2, | |
IndexedSeq(3), | |
IndexedSeq( | |
Leaf(2, IndexedSeq(1, 2), IndexedSeq("one", "two")), | |
Leaf(2, IndexedSeq(3, 5, 8), IndexedSeq("three", "five", "eight")), | |
), | |
)) | |
br2.insert(15, "十五") | |
/// オーバーフロー時 | |
val l1 = Leaf(2, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")) | |
l1.isOverflow | |
.ensuring(_ == true) | |
l1.insert(4, "IV") | |
.ensuring(_ == | |
Split(2, 13, | |
Leaf(2, IndexedSeq(4, 10), IndexedSeq("IV", "X")), | |
Leaf(2, IndexedSeq(13, 15), IndexedSeq("XIII", "XV")) | |
) | |
) | |
val br3 = Branch( | |
2, | |
IndexedSeq(10, 20), | |
IndexedSeq( | |
Leaf(2, IndexedSeq(5, 6, 7), IndexedSeq("五", "六", "七")), | |
Leaf(2, IndexedSeq(10, 13, 14), IndexedSeq("十", "十三", "十四")), | |
Leaf(2, IndexedSeq(20, 23, 24), IndexedSeq("二十", "二十三", "二十四")), | |
) | |
).insert(12, "十二") | |
br3.ensuring(_ == Split( | |
2, | |
13, | |
Branch( | |
2, | |
IndexedSeq(10), | |
IndexedSeq( | |
Leaf(2, IndexedSeq(5, 6, 7), IndexedSeq("五", "六", "七")), | |
Leaf(2, IndexedSeq(10, 12), IndexedSeq("十", "十二")), | |
) | |
), | |
Branch( | |
2, | |
IndexedSeq(13, 20), | |
IndexedSeq( | |
Empty(2), | |
Leaf(2, IndexedSeq(13, 14), IndexedSeq("十三", "十四")), | |
Leaf(2, IndexedSeq(20, 23, 24), IndexedSeq("二十", "二十三", "二十四")), | |
) | |
) | |
)) | |
br3.find(12).ensuring(_ == Some("十二")) | |
/// 置換 | |
Leaf(3, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "XV")).insert(15, "十五") | |
.ensuring(_ == Leaf(3, IndexedSeq(10, 13, 15), IndexedSeq("X", "XIII", "十五"))) | |
// val randomList = List.fill(1<<6)(Math.round(Math.random() * 65535)) | |
val randomList = (1L to 42L).toSeq | |
val (tr1, t) = time { () => | |
randomList.foldLeft( | |
Empty(5).asInstanceOf[BTree[Long, Long]] | |
) { (t: BTree[Long, Long], v: Long) => | |
t.insert(v, v) | |
} | |
} | |
println(s"time: ${formatNanoSec(t)}") | |
val (_, t2) = time { () => | |
randomList.foreach { v => | |
tr1.find(v).ensuring(_ == Some(v)) | |
} | |
} | |
println(s"time: ${formatNanoSec(t2)}") | |
{ | |
val (tr1, t) = time { () => | |
randomList.foldLeft( | |
new java.util.TreeMap[Long, Long] | |
) { (t: java.util.TreeMap[Long, Long], v: Long) => | |
t.put(v, v) | |
t | |
} | |
} | |
println(s"time: ${formatNanoSec(t)}") | |
val (_, t2) = time { () => | |
randomList.foreach { v => | |
tr1.get(v).ensuring(_ == v) | |
} | |
} | |
println(s"time: ${formatNanoSec(t2)}") | |
1 | |
} | |
case class GraphvizNode(name: String, label: String, children: Seq[GraphvizNode]) { | |
def nodeDefinition: String = { | |
s""" | |
${name} [label="${label}"] | |
${children.map(_.nodeDefinition).mkString("\n")} | |
""".stripMargin | |
} | |
def arrowDefinition: String = { | |
s""" | |
${children.map(c => s"${name}:${c.name} -> ${c.name}").mkString("\n")} | |
${children.map(_.arrowDefinition).mkString("\n")} | |
""".stripMargin | |
} | |
} | |
class BTreeGraphviz[K, V](val btree: BTree[K, V]) { | |
override def toString: String = { | |
val g = graphviz | |
s""" | |
digraph G { | |
node [shape=record]; | |
${g.nodeDefinition} | |
${g.arrowDefinition} | |
} | |
""".stripMargin | |
} | |
def graphviz: GraphvizNode = { | |
btree match { | |
case Branch(_, separators, subtrees) => | |
val sepLabel = separators.mkString("|") | |
val subtreeNodes = subtrees.map(t => new BTreeGraphviz[K, V](t).graphviz) | |
val subtreeRefLabels = subtreeNodes.map(t => s"<${t.name}> ●").mkString("|") | |
val label = s"""{{${sepLabel}}|{${subtreeRefLabels}}}""" | |
GraphvizNode(name, label, subtreeNodes) | |
case Leaf(_, keys, values) => | |
val keysLabel = keys.mkString("|") | |
val valuesLabel = values.mkString("|") | |
val label = s"""{{${keysLabel}}|{${valuesLabel}}}""" | |
GraphvizNode(name, label, Seq()) | |
case Split(_, key, left, right) => | |
val lNode = new BTreeGraphviz[K, V](left).graphviz | |
val rNode = new BTreeGraphviz[K, V](right).graphviz | |
val label = s"""{${key}|{<${lNode.name}>●|<${rNode.name}> ●}}""" | |
GraphvizNode(name, label, Seq(lNode, rNode)) | |
case Empty(_) => | |
GraphvizNode(name, "", Seq()) | |
} | |
} | |
def name: String = { | |
(Math.round(Math.abs(btree.hashCode) * Math.random())).toString | |
} | |
} | |
println(new BTreeGraphviz[Long, Long](tr1).toString) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://stackoverflow.com/a/55032051 | |
def pprint(obj: Any, depth: Int = 0, paramName: Option[String] = None): Unit = { | |
val indent = " " * depth | |
val prettyName = paramName.fold("")(x => s"$x: ") | |
val ptype = obj match { case _: Iterable[Any] => "" case obj: Product => obj.productPrefix case _ => obj.toString } | |
println(s"$indent$prettyName$ptype") | |
obj match { | |
case seq: Iterable[Any] => | |
seq.zipWithIndex.foreach { case (v, idx) => | |
pprint(v, depth + 1, Some(s"${idx}")) | |
} | |
case obj: Product => | |
(obj.productIterator zip obj.productElementNames) | |
.foreach { case (subObj, paramName) => pprint(subObj, depth + 1, Some(paramName)) } | |
case _ => | |
} | |
} | |
case class Slots[K, V]( | |
capacity: Int, | |
splitPointer: Int, | |
entries: Seq[Option[Bucket[K, V]]], | |
) { | |
val bucketCapacity = 2 | |
val doubleCapacity = capacity * 2 | |
require(capacity > 0) | |
require(entries.length == capacity || entries.length == doubleCapacity) | |
require(splitPointer >= 0 && splitPointer < capacity) | |
def insert(key: K, value: V): Slots[K, V] = { | |
val idx = indexFor(key) | |
entries(idx) match { | |
case Some(bucket) => { | |
val result = bucket.push(key, value) | |
val updatedEntries = entries.updated(idx, Some(result.bucket)) | |
val newSlots = copy(entries = updatedEntries) | |
if (result.overflow) newSlots.split | |
else newSlots | |
} | |
case None => { | |
val result = Bucket(capacity = bucketCapacity).push(key, value) | |
result.ensuring(_.overflow == false) | |
copy(entries = entries.updated(idx, Some(result.bucket))) | |
} | |
} | |
} | |
def find(key: K): Option[V] = { | |
val bucket = entries(indexFor(key)) | |
bucket.flatMap { b => b.find(key) } | |
} | |
def indexFor(key: K): Int = { | |
val hash1 = key.hashCode % capacity | |
if (hash1 < splitPointer) { | |
key.hashCode % doubleCapacity | |
} else { | |
key.hashCode % capacity | |
} | |
} | |
def split: Slots[K, V] = { | |
val originalBucket = entries(splitPointer) | |
val newSlots: Slots[K, V] = { | |
val newPointer = (splitPointer + 1) % capacity | |
val newCapacity = | |
if (newPointer == 0) capacity * 2 | |
else capacity | |
val cleared = entries.updated(splitPointer, None) | |
val newEntries = { | |
if (cleared.length < doubleCapacity) cleared ++ Seq.fill(capacity)(None) | |
else cleared | |
} | |
.ensuring(_.length == doubleCapacity) | |
val splitSlots = Slots( | |
capacity = newCapacity, | |
entries = newEntries, | |
splitPointer = newPointer, | |
) | |
splitSlots | |
} | |
/* | |
println("=====SPLIT=====") | |
println(originalBucket) | |
println(newSlots) | |
println("===============") | |
*/ | |
originalBucket match { | |
case Some(bucket) => { | |
def appendAll(slots: Slots[K, V], bucket: Bucket[K, V]): Slots[K, V] = { | |
val newSlots = bucket.entries.foldLeft(slots) { case (s, (k, v)) => | |
s.insert(k, v) | |
} | |
bucket.nextBucket match { | |
case Some(next) => appendAll(newSlots, next) | |
case None => newSlots | |
} | |
} | |
appendAll(newSlots, bucket) | |
} | |
case None => newSlots | |
} | |
} | |
} | |
object Slots { | |
def create[K, V](): Slots[K, V] = { | |
withCapacity(32) | |
} | |
def withCapacity[K, V](capacity: Int): Slots[K, V] = { | |
new Slots( | |
capacity = capacity, | |
entries = Seq.fill(capacity)(None), | |
splitPointer = 0, | |
) | |
} | |
} | |
case class BucketPushResult[K, V]( | |
bucket: Bucket[K, V], | |
overflow: Boolean | |
) | |
case class Bucket[K, V]( | |
capacity: Int = 32, | |
entries: Seq[Tuple2[K, V]] = Seq(), | |
nextBucket: Option[Bucket[K, V]] = None, | |
) { | |
require(capacity > 0, "capacity should be > 0") | |
require(entries.length >= 0, "length of entries should be >= 0") | |
require(entries.length <= capacity, "length of entries should be less than capacity") | |
def push(key: K, value: V): BucketPushResult[K, V] = { | |
if (isFull) { | |
nextBucket match { | |
case Some(next) => { | |
val result = next.push(key, value) | |
val newBucket = copy(nextBucket = Some(result.bucket)) | |
result.copy(bucket = newBucket) | |
} | |
case None => { | |
val result = Bucket(capacity = capacity).push(key, value) | |
.ensuring(_.bucket.capacity == capacity) | |
.ensuring(_.bucket.entries.length == 1) | |
val newBucket = copy(nextBucket = Some(result.bucket)) | |
BucketPushResult( | |
bucket = newBucket, | |
overflow = true | |
) | |
} | |
} | |
} else { | |
// TODO how we overwrite value | |
BucketPushResult( | |
bucket = copy(entries = entries :+ (key -> value)), | |
overflow = false, | |
) | |
} | |
} | |
def find(key: K): Option[V] = { | |
entries.find { case (k, _) => k == key } match { | |
case Some((_, v)) => Some(v) | |
case None => { | |
nextBucket.flatMap { _.find(key) } | |
} | |
} | |
} | |
def isFull: Boolean = { | |
entries.length == capacity | |
} | |
} | |
Bucket[Int, Int](capacity = 1).push(1, 1).bucket | |
.ensuring(_ == Bucket(1, Seq((1, 1))), None) | |
Bucket[Int, Int](capacity = 1).push(1, 1).bucket.push(2, 2).bucket | |
.ensuring(_ == Bucket(1, Seq((1, 1)), Some(Bucket(1, Seq((2, 2)), None)))) | |
Bucket[Int, Int](capacity = 1).push(1, 1).bucket.find(1) | |
.ensuring(_ == Some(1)) | |
val s1 = (1 to 512).foldLeft(Slots.withCapacity[Int, Int](1)) { (s, n) => s.insert(n, n) } | |
(1 to 512).forall { e => s1.find(e).nonEmpty }.ensuring(_ == true) | |
pprint(s1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment