Skip to content

Instantly share code, notes, and snippets.

@EmmanuelOga
Last active April 29, 2022 00:52
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EmmanuelOga/1d52adb79bdb6092fb698ed5b31fa377 to your computer and use it in GitHub Desktop.
Save EmmanuelOga/1d52adb79bdb6092fb698ed5b31fa377 to your computer and use it in GitHub Desktop.
import it.unimi.dsi.fastutil.objects.ObjectAVLTreeSet
data class Quad(val s: String = "", val p: String = "", val o: Comparable<Any>? = null, val c: String = "") :
Comparable<Quad> {
override fun compareTo(other: Quad): Int = compareValuesBy(this, other, Quad::s, Quad::p, Quad::o, Quad::c)
override fun toString(): String = "Q<S=$s P=$p O=$o C=$c>"
}
enum class Index { SPOC, POC, OCS, CSP, CP, OS }
class Quadstore {
private fun createIndex(comparator: Comparator<Quad>) = ObjectAVLTreeSet<Quad>(nullsFirst(comparator)::compare)
private val indices = mapOf(
Index.SPOC to createIndex(compareBy(Quad::s).thenBy(Quad::p).thenBy(Quad::o).thenBy(Quad::c)),
Index.POC to createIndex(compareBy(Quad::p).thenBy(Quad::o).thenBy(Quad::c)),
Index.OCS to createIndex(compareBy(Quad::o).thenBy(Quad::c).thenBy(Quad::s)),
Index.CSP to createIndex(compareBy(Quad::c).thenBy(Quad::s).thenBy(Quad::p)),
Index.CP to createIndex(compareBy(Quad::c).thenBy(Quad::p)),
Index.OS to createIndex(compareBy(Quad::o).thenBy(Quad::s))
)
override fun toString(): String = "Quadstore(${indices[Index.CSP]!!.size} entries)"
fun clear() {
for ((_, index) in indices) index.clear()
}
fun add(quad: Quad) {
for ((_, index) in indices) {
index.add(quad)
}
}
fun remove(quad: Quad) {
for ((_, index) in indices) {
index.add(quad)
}
}
fun filter(s: String? = null, p: String? = null, o: RItem? = null, c: String? = null) = sequence {
val indexKey = when {
s == null && p == null && o == null && c == null -> Index.SPOC // _ _ _ _
s == null && p == null && o == null && c != null -> Index.CSP // _ _ _ c
s == null && p == null && o != null && c == null -> Index.OCS // _ _ o _
s == null && p == null && o != null && c != null -> Index.OCS // _ _ o c
s == null && p != null && o == null && c == null -> Index.POC // _ p _ _
s == null && p != null && o == null && c != null -> Index.CP // _ p _ c
s == null && p != null && o != null && c == null -> Index.POC // _ p o _
s == null && p != null && o != null && c != null -> Index.POC // _ p o c
s != null && p == null && o == null && c == null -> Index.SPOC // s _ _ _
s != null && p == null && o == null && c != null -> Index.CSP // s _ _ c
s != null && p == null && o != null && c == null -> Index.OS // s _ o _
s != null && p == null && o != null && c != null -> Index.OCS // s _ o c
s != null && p != null && o == null && c == null -> Index.SPOC // s p _ _
s != null && p != null && o == null && c != null -> Index.CSP // s p _ c
s != null && p != null && o != null && c == null -> Index.SPOC // s p o _
s != null && p != null && o != null && c != null -> Index.SPOC // s p o c
else -> Index.CSP
}
val start = Quad(
s = s ?: "",
p = p ?: "",
o = o ?: RNull,
c = c ?: "",
)
val index = indices[indexKey]!!
for (quad in index.tailSet(start)) {
if (s != null && quad.s > s) break
if (p != null && quad.p > p) break
if (o != null && quad.o > o) break
if (c != null && quad.c > c) break
yield(quad)
}
}
}
fun Sequence<Quad>.props(vararg targetProps: String): Map<String, RItem> {
val quads = this
val result = mutableMapOf<String, RItem>()
val props = targetProps.toMutableSet()
for (quad in quads) {
if (props.contains(quad.p)) {
result[quad.p] = quad.o
props.remove(quad.p)
}
if (props.isEmpty()) break
}
for (prop in props) result[prop] = RNull
return result
}
fun main() {
val qs = Quadstore()
val nanoToSec = 1_000_000_000.0
val times = mutableMapOf<Int, Long>()
val start = System.nanoTime()
repeat(1000001) {
val s = "pred:${(it * 11).toString(16)}${(it * 13).toString(16)}"
val p = "pred:${(it * 13).toString(16)}${(it * 9).toString(16)}"
val c = "pred:${(it * 9).toString(16)}${(it * 11).toString(16)}"
val o = """
This is the object of the Quad to be stored on the quad store.
The other quad components are:
val s = "pred:${(it * 11).toString(16)}${(it * 13).toString(16)}"
val p = "pred:${(it * 13).toString(16)}${(it * 9).toString(16)}"
val c = "pred:${(it * 9).toString(16)}${(it * 11).toString(16)}"
"""
val quad = Quad(s = s, p = p, o = o, c = c)
qs.add(quad)
if (it > 0 && it % 100000 == 0) {
// println(quad); // <<-- print some quads to search for.
times[it] = System.nanoTime()
}
}
println(qs)
println("Elements\ttime")
for ((number, time) in times) {
println("$number\t${(time - start) / nanoToSec}")
}
var results = 0
val time = (measureNanoTime {
repeat(1000) {
results += qs.filter(s = "pred:10c8e013d620", p = "pred:13d620dbba0").count()
results += qs.filter(s = "pred:10c8e013d620", p = "pred:13d620dbba0").count()
results += qs.filter(s = "pred:2191c027ac40", p = "pred:27ac401b7740").count()
results += qs.filter(s = "pred:325aa03b8260", p = "pred:3b82602932e0").count()
results += qs.filter(s = "pred:4323804f5880", p = "pred:4f588036ee80").count()
results += qs.filter(s = "pred:53ec60632ea0", p = "pred:632ea044aa20").count()
results += qs.filter(s = "pred:64b5407704c0", p = "pred:7704c05265c0").count()
results += qs.filter(s = "pred:757e208adae0", p = "pred:8adae0602160").count()
results += qs.filter(s = "pred:8647009eb100", p = "pred:9eb1006ddd00").count()
results += qs.filter(s = "pred:970fe0b28720", p = "pred:b287207b98a0").count()
results += qs.filter(s = "pred:a7d8c0c65d40", p = "pred:c65d40895440").count()
results += qs.filter(p = "pred:13d620dbba0").count()
results += qs.filter(p = "pred:13d620dbba0").count()
results += qs.filter(p = "pred:27ac401b7740").count()
results += qs.filter(p = "pred:3b82602932e0").count()
results += qs.filter(p = "pred:4f588036ee80").count()
results += qs.filter(p = "pred:632ea044aa20").count()
results += qs.filter(p = "pred:7704c05265c0").count()
results += qs.filter(p = "pred:8adae0602160").count()
results += qs.filter(p = "pred:9eb1006ddd00").count()
results += qs.filter(p = "pred:b287207b98a0").count()
results += qs.filter(p = "pred:c65d40895440").count()
results += qs.filter(s = "pred:10c8e013d620").count()
results += qs.filter(s = "pred:10c8e013d620").count()
results += qs.filter(s = "pred:2191c027ac40").count()
results += qs.filter(s = "pred:325aa03b8260").count()
results += qs.filter(s = "pred:4323804f5880").count()
results += qs.filter(s = "pred:53ec60632ea0").count()
results += qs.filter(s = "pred:64b5407704c0").count()
results += qs.filter(s = "pred:757e208adae0").count()
results += qs.filter(s = "pred:8647009eb100").count()
results += qs.filter(s = "pred:970fe0b28720").count()
results += qs.filter(s = "pred:a7d8c0c65d40").count()
}
}) / nanoToSec
println("$results matches in ${time} seconds ${results / time}q/sec ")
}
@EmmanuelOga
Copy link
Author

EmmanuelOga commented Jan 24, 2021

1,000,000 quads in ~7 secs with linear runtime complexity!

Consumed around 4,5GB of RAM on Windows OpenJDK 14 w/o any sort of G.C. tuning.

https://twitter.com/EmmanuelOga/status/1353265056381767681

Quads Inserted time (seconds)
100,000 0.79
200,000 1.46
300,000 2.10
400,000 2.82
500,000 3.51
600,000 4.18
700,000 4.78
800,000 5.59
900,000 6.36
1,000,000 7.06

Quads founds with 1,000,000 entries in store:

33,000 matches in 0.0688622 seconds (479,218 matches/sec) ! 🤩

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment