Last active
April 29, 2022 00:52
-
-
Save EmmanuelOga/1d52adb79bdb6092fb698ed5b31fa377 to your computer and use it in GitHub Desktop.
Quadstore using an AVLTreeSet https://twitter.com/EmmanuelOga/status/1353265056381767681
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet | |
data class Quad(val s: String = "", val p: String = "", val o: Comparable<Any>? = null, val c: String = "") : | |
Comparable<Quad> { | |
override fun compareTo(other: Quad): Int = compareValuesBy(this, other, Quad::s, Quad::p, Quad::o, Quad::c) | |
override fun toString(): String = "Q<S=$s P=$p O=$o C=$c>" | |
} | |
enum class Index { SPOC, POC, OCS, CSP, CP, OS } | |
class Quadstore { | |
private fun createIndex(comparator: Comparator<Quad>) = ObjectAVLTreeSet<Quad>(nullsFirst(comparator)::compare) | |
private val indices = mapOf( | |
Index.SPOC to createIndex(compareBy(Quad::s).thenBy(Quad::p).thenBy(Quad::o).thenBy(Quad::c)), | |
Index.POC to createIndex(compareBy(Quad::p).thenBy(Quad::o).thenBy(Quad::c)), | |
Index.OCS to createIndex(compareBy(Quad::o).thenBy(Quad::c).thenBy(Quad::s)), | |
Index.CSP to createIndex(compareBy(Quad::c).thenBy(Quad::s).thenBy(Quad::p)), | |
Index.CP to createIndex(compareBy(Quad::c).thenBy(Quad::p)), | |
Index.OS to createIndex(compareBy(Quad::o).thenBy(Quad::s)) | |
) | |
override fun toString(): String = "Quadstore(${indices[Index.CSP]!!.size} entries)" | |
fun clear() { | |
for ((_, index) in indices) index.clear() | |
} | |
fun add(quad: Quad) { | |
for ((_, index) in indices) { | |
index.add(quad) | |
} | |
} | |
fun remove(quad: Quad) { | |
for ((_, index) in indices) { | |
index.add(quad) | |
} | |
} | |
fun filter(s: String? = null, p: String? = null, o: RItem? = null, c: String? = null) = sequence { | |
val indexKey = when { | |
s == null && p == null && o == null && c == null -> Index.SPOC // _ _ _ _ | |
s == null && p == null && o == null && c != null -> Index.CSP // _ _ _ c | |
s == null && p == null && o != null && c == null -> Index.OCS // _ _ o _ | |
s == null && p == null && o != null && c != null -> Index.OCS // _ _ o c | |
s == null && p != null && o == null && c == null -> Index.POC // _ p _ _ | |
s == null && p != null && o == null && c != null -> Index.CP // _ p _ c | |
s == null && p != null && o != null && c == null -> Index.POC // _ p o _ | |
s == null && p != null && o != null && c != null -> Index.POC // _ p o c | |
s != null && p == null && o == null && c == null -> Index.SPOC // s _ _ _ | |
s != null && p == null && o == null && c != null -> Index.CSP // s _ _ c | |
s != null && p == null && o != null && c == null -> Index.OS // s _ o _ | |
s != null && p == null && o != null && c != null -> Index.OCS // s _ o c | |
s != null && p != null && o == null && c == null -> Index.SPOC // s p _ _ | |
s != null && p != null && o == null && c != null -> Index.CSP // s p _ c | |
s != null && p != null && o != null && c == null -> Index.SPOC // s p o _ | |
s != null && p != null && o != null && c != null -> Index.SPOC // s p o c | |
else -> Index.CSP | |
} | |
val start = Quad( | |
s = s ?: "", | |
p = p ?: "", | |
o = o ?: RNull, | |
c = c ?: "", | |
) | |
val index = indices[indexKey]!! | |
for (quad in index.tailSet(start)) { | |
if (s != null && quad.s > s) break | |
if (p != null && quad.p > p) break | |
if (o != null && quad.o > o) break | |
if (c != null && quad.c > c) break | |
yield(quad) | |
} | |
} | |
} | |
fun Sequence<Quad>.props(vararg targetProps: String): Map<String, RItem> { | |
val quads = this | |
val result = mutableMapOf<String, RItem>() | |
val props = targetProps.toMutableSet() | |
for (quad in quads) { | |
if (props.contains(quad.p)) { | |
result[quad.p] = quad.o | |
props.remove(quad.p) | |
} | |
if (props.isEmpty()) break | |
} | |
for (prop in props) result[prop] = RNull | |
return result | |
} | |
fun main() { | |
val qs = Quadstore() | |
val nanoToSec = 1_000_000_000.0 | |
val times = mutableMapOf<Int, Long>() | |
val start = System.nanoTime() | |
repeat(1000001) { | |
val s = "pred:${(it * 11).toString(16)}${(it * 13).toString(16)}" | |
val p = "pred:${(it * 13).toString(16)}${(it * 9).toString(16)}" | |
val c = "pred:${(it * 9).toString(16)}${(it * 11).toString(16)}" | |
val o = """ | |
This is the object of the Quad to be stored on the quad store. | |
The other quad components are: | |
val s = "pred:${(it * 11).toString(16)}${(it * 13).toString(16)}" | |
val p = "pred:${(it * 13).toString(16)}${(it * 9).toString(16)}" | |
val c = "pred:${(it * 9).toString(16)}${(it * 11).toString(16)}" | |
""" | |
val quad = Quad(s = s, p = p, o = o, c = c) | |
qs.add(quad) | |
if (it > 0 && it % 100000 == 0) { | |
// println(quad); // <<-- print some quads to search for. | |
times[it] = System.nanoTime() | |
} | |
} | |
println(qs) | |
println("Elements\ttime") | |
for ((number, time) in times) { | |
println("$number\t${(time - start) / nanoToSec}") | |
} | |
var results = 0 | |
val time = (measureNanoTime { | |
repeat(1000) { | |
results += qs.filter(s = "pred:10c8e013d620", p = "pred:13d620dbba0").count() | |
results += qs.filter(s = "pred:10c8e013d620", p = "pred:13d620dbba0").count() | |
results += qs.filter(s = "pred:2191c027ac40", p = "pred:27ac401b7740").count() | |
results += qs.filter(s = "pred:325aa03b8260", p = "pred:3b82602932e0").count() | |
results += qs.filter(s = "pred:4323804f5880", p = "pred:4f588036ee80").count() | |
results += qs.filter(s = "pred:53ec60632ea0", p = "pred:632ea044aa20").count() | |
results += qs.filter(s = "pred:64b5407704c0", p = "pred:7704c05265c0").count() | |
results += qs.filter(s = "pred:757e208adae0", p = "pred:8adae0602160").count() | |
results += qs.filter(s = "pred:8647009eb100", p = "pred:9eb1006ddd00").count() | |
results += qs.filter(s = "pred:970fe0b28720", p = "pred:b287207b98a0").count() | |
results += qs.filter(s = "pred:a7d8c0c65d40", p = "pred:c65d40895440").count() | |
results += qs.filter(p = "pred:13d620dbba0").count() | |
results += qs.filter(p = "pred:13d620dbba0").count() | |
results += qs.filter(p = "pred:27ac401b7740").count() | |
results += qs.filter(p = "pred:3b82602932e0").count() | |
results += qs.filter(p = "pred:4f588036ee80").count() | |
results += qs.filter(p = "pred:632ea044aa20").count() | |
results += qs.filter(p = "pred:7704c05265c0").count() | |
results += qs.filter(p = "pred:8adae0602160").count() | |
results += qs.filter(p = "pred:9eb1006ddd00").count() | |
results += qs.filter(p = "pred:b287207b98a0").count() | |
results += qs.filter(p = "pred:c65d40895440").count() | |
results += qs.filter(s = "pred:10c8e013d620").count() | |
results += qs.filter(s = "pred:10c8e013d620").count() | |
results += qs.filter(s = "pred:2191c027ac40").count() | |
results += qs.filter(s = "pred:325aa03b8260").count() | |
results += qs.filter(s = "pred:4323804f5880").count() | |
results += qs.filter(s = "pred:53ec60632ea0").count() | |
results += qs.filter(s = "pred:64b5407704c0").count() | |
results += qs.filter(s = "pred:757e208adae0").count() | |
results += qs.filter(s = "pred:8647009eb100").count() | |
results += qs.filter(s = "pred:970fe0b28720").count() | |
results += qs.filter(s = "pred:a7d8c0c65d40").count() | |
} | |
}) / nanoToSec | |
println("$results matches in ${time} seconds ${results / time}q/sec ") | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
1,000,000 quads in ~7 secs with linear runtime complexity!
Consumed around 4,5GB of RAM on Windows OpenJDK 14 w/o any sort of G.C. tuning.
https://twitter.com/EmmanuelOga/status/1353265056381767681
Quads founds with 1,000,000 entries in store:
33,000 matches in 0.0688622 seconds (479,218 matches/sec) ! 🤩