Last active
October 22, 2021 12:20
-
-
Save ivarref/0d3d34eeeffbc4625d6120727368e405 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns datomic-oom | |
(:require [datomic.api :as d] | |
[clojure.set :as set]) | |
(:import (java.util UUID))) | |
; This gist reproduces an odd (?) OutOfMemoryError with datomic-pro v1.0.6344 | |
(def conn (let [uri "datomic:mem://demo"] | |
(d/delete-database uri) | |
(d/create-database uri) | |
(d/connect uri))) | |
(def schema | |
[#:db{:ident :e/b, :cardinality :db.cardinality/one, :valueType :db.type/boolean, :index true} | |
#:db{:ident :e/u, :cardinality :db.cardinality/one, :valueType :db.type/uuid, :index true}]) | |
@(d/transact conn schema) | |
(def n 100000) | |
(def uuids (repeatedly n #(UUID/randomUUID))) ; it also OOMs with d/squuid | |
(def true-count 1000) | |
(def bools (mapv #(< % true-count) (range n))) | |
(assert (= true-count (count (filter true? bools)))) | |
(def tx | |
(mapv (fn [b u] | |
{:e/b b | |
:e/u u}) | |
bools uuids)) | |
@(d/transact conn tx) | |
(comment | |
(time | |
(count | |
(d/query | |
{:query {:find '[?e] | |
:in '[$ ?b [?u ...]] | |
:where '[[?e :e/u ?u] ; this will match 50k datoms, i.e. it is the least restrictive | |
[?e :e/b ?b]]} ; yet it finishes in 400ms on my machine. | |
:args [(d/db conn) | |
true | |
(vec (take 50000 uuids))]})))) | |
; using sets contains? is much faster: ~50 ms on my machine. | |
(comment | |
(time | |
(count | |
(d/query {:query {:find '[?e] | |
:in '[$ ?b ?us] | |
:where '[[?e :e/b ?b] | |
[?e :e/u ?u] | |
[(contains? ?us ?u)]]} | |
:args [(d/db conn) | |
true | |
(into #{} (take 50000 uuids))]})))) | |
; about 300ms | |
(comment | |
(time | |
(count | |
(d/query | |
{:query {:find '[?e] | |
:in '[$ ?b ?us] | |
:where '[[?e :e/u ?u] | |
[(contains? ?us ?u)] | |
[?e :e/b ?b]]} | |
:args [(d/db conn) | |
true | |
(into #{} (vec (take 50000 uuids)))]})))) | |
; gives OutOfMemoryError | |
(comment | |
(time | |
(count | |
(d/query | |
{:query {:find '[?e] | |
:in '[$ ?b [?u ...]] | |
:where '[[?e :e/b ?b] ; This will match 1000 datoms, i.e. it is the most restrictive | |
[?e :e/u ?u]]} ; and yet it never finishes on my machine, but instead giving an OutOfMemoryError (4 GB heap). | |
:args [(d/db conn) | |
true | |
(vec (take 50000 uuids))]})))) | |
(comment | |
; qseq also fails with OutOfMemoryError (4 GB heap). | |
(->> (d/qseq | |
{:query {:find '[?e] | |
:in '[$ ?b [?u ...]] | |
:where '[[?e :e/b ?b] ; This will match 1000 datoms, i.e. it is the most restrictive | |
[?e :e/u ?u]]} ; and yet it never finishes on my machine, but instead giving an OutOfMemoryError (4 GB heap). | |
:args [(d/db conn) | |
true | |
(vec (take 50000 uuids))]}) | |
first | |
time)) | |
; from https://docs.datomic.com/on-prem/best-practices.html#collections-as-inputs | |
; You can use a collection binding in query to match several values specified in a collection. | |
; This behaves as a logical or, that is, it returns a union of the results for each item in the collection. | |
(comment | |
(time | |
(let [ids (vec (take 50000 uuids)) | |
db (d/db conn)] | |
(->> ids | |
(map #(set (d/query {:query {:find '[[?e ...]] | |
:in '[$ ?b ?u] | |
:where '[[?e :e/b ?b] | |
[?e :e/u ?u]]} | |
:args [db true %]}))) | |
(reduce set/union #{}))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment