Skip to content

Instantly share code, notes, and snippets.

@ivarref
Last active October 22, 2021 12:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivarref/0d3d34eeeffbc4625d6120727368e405 to your computer and use it in GitHub Desktop.
Save ivarref/0d3d34eeeffbc4625d6120727368e405 to your computer and use it in GitHub Desktop.
(ns datomic-oom
(:require [datomic.api :as d]
[clojure.set :as set])
(:import (java.util UUID)))
; This gist reproduces an odd (?) OutOfMemoryError with datomic-pro v1.0.6344
(def conn (let [uri "datomic:mem://demo"]
(d/delete-database uri)
(d/create-database uri)
(d/connect uri)))
(def schema
[#:db{:ident :e/b, :cardinality :db.cardinality/one, :valueType :db.type/boolean, :index true}
#:db{:ident :e/u, :cardinality :db.cardinality/one, :valueType :db.type/uuid, :index true}])
@(d/transact conn schema)
(def n 100000)
(def uuids (repeatedly n #(UUID/randomUUID))) ; it also OOMs with d/squuid
(def true-count 1000)
(def bools (mapv #(< % true-count) (range n)))
(assert (= true-count (count (filter true? bools))))
(def tx
(mapv (fn [b u]
{:e/b b
:e/u u})
bools uuids))
@(d/transact conn tx)
(comment
(time
(count
(d/query
{:query {:find '[?e]
:in '[$ ?b [?u ...]]
:where '[[?e :e/u ?u] ; this will match 50k datoms, i.e. it is the least restrictive
[?e :e/b ?b]]} ; yet it finishes in 400ms on my machine.
:args [(d/db conn)
true
(vec (take 50000 uuids))]}))))
; using sets contains? is much faster: ~50 ms on my machine.
(comment
(time
(count
(d/query {:query {:find '[?e]
:in '[$ ?b ?us]
:where '[[?e :e/b ?b]
[?e :e/u ?u]
[(contains? ?us ?u)]]}
:args [(d/db conn)
true
(into #{} (take 50000 uuids))]}))))
; about 300ms
(comment
(time
(count
(d/query
{:query {:find '[?e]
:in '[$ ?b ?us]
:where '[[?e :e/u ?u]
[(contains? ?us ?u)]
[?e :e/b ?b]]}
:args [(d/db conn)
true
(into #{} (vec (take 50000 uuids)))]}))))
; gives OutOfMemoryError
(comment
(time
(count
(d/query
{:query {:find '[?e]
:in '[$ ?b [?u ...]]
:where '[[?e :e/b ?b] ; This will match 1000 datoms, i.e. it is the most restrictive
[?e :e/u ?u]]} ; and yet it never finishes on my machine, but instead giving an OutOfMemoryError (4 GB heap).
:args [(d/db conn)
true
(vec (take 50000 uuids))]}))))
(comment
; qseq also fails with OutOfMemoryError (4 GB heap).
(->> (d/qseq
{:query {:find '[?e]
:in '[$ ?b [?u ...]]
:where '[[?e :e/b ?b] ; This will match 1000 datoms, i.e. it is the most restrictive
[?e :e/u ?u]]} ; and yet it never finishes on my machine, but instead giving an OutOfMemoryError (4 GB heap).
:args [(d/db conn)
true
(vec (take 50000 uuids))]})
first
time))
; from https://docs.datomic.com/on-prem/best-practices.html#collections-as-inputs
; You can use a collection binding in query to match several values specified in a collection.
; This behaves as a logical or, that is, it returns a union of the results for each item in the collection.
(comment
(time
(let [ids (vec (take 50000 uuids))
db (d/db conn)]
(->> ids
(map #(set (d/query {:query {:find '[[?e ...]]
:in '[$ ?b ?u]
:where '[[?e :e/b ?b]
[?e :e/u ?u]]}
:args [db true %]})))
(reduce set/union #{})))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment