Skip to content

Instantly share code, notes, and snippets.

@petterik
Last active June 8, 2018 07:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save petterik/0e0c9bcc4b223c01bbb1442e820ea503 to your computer and use it in GitHub Desktop.
Save petterik/0e0c9bcc4b223c01bbb1442e820ea503 to your computer and use it in GitHub Desktop.
Making eduction and sequence faster for chunked-seqs

Commands for eduction and sequence improvements

Clojure

clj -Sdeps '{:deps {github-petterik/gist-0e0c9bcc4b223c01bbb1442e820ea503 {:git/url "https://gist.github.com/petterik/0e0c9bcc4b223c01bbb1442e820ea503" :sha "01ff87497f6dbed86905cab652b24f499aac7a38"}}}' -m eduction

ClojureScript

clj -Sdeps '{:deps {github-petterik/gist-0e0c9bcc4b223c01bbb1442e820ea503 {:git/url "https://gist.github.com/petterik/0e0c9bcc4b223c01bbb1442e820ea503" :sha "01ff87497f6dbed86905cab652b24f499aac7a38"}}}' -m cljs.main -re node --main eduction

{:paths ["."]
:deps {org.clojure/clojure {:mvn/version "1.10.0-alpha4"}
org.clojure/clojurescript {:mvn/version "1.10.238"}
org.clojure/test.check {:mvn/version "0.10.0-alpha2"}}}
(ns eduction
#?(:cljs (:require-macros [eduction :refer [benchmark]]))
(:require
[clojure.spec.alpha :as s]
[clojure.spec.gen.alpha :as gen]
[clojure.test.check :as tc :include-macros true]
[clojure.test.check.properties :as prop :include-macros true]))
(defn- chunked-sequence [xform s]
(let [xf (xform conj!)
rrf #?(:clj (#'clojure.core/preserving-reduced xf)
:cljs (#'cljs.core/preserving-reduced xf))
step (fn self [v s]
(if-not s
(persistent! (xf v))
(let [c (chunk-first s)
;; Calling unreduced for :clj to get the same
;; behaviour as :cljs
#?@(:clj [chunk (unreduced (.reduce c rrf v))]
:cljs [chunk (reduce rrf v c)])]
(if (reduced? chunk)
(seq (persistent! (xf (deref chunk))))
(let [next-chunk (chunk-next s)]
(if (zero? (count chunk))
(lazy-seq
(self v next-chunk))
(lazy-cat
(persistent! chunk)
(self (transient []) next-chunk))))))))]
(step (transient []) (seq s))))
(defn sequence2 [xform coll]
(let [s (seq coll)]
(if (chunked-seq? s)
(seq (chunked-sequence xform coll))
(sequence xform coll))))
(deftype Eduction2 [xform coll]
#?@(:clj
[clojure.lang.Seqable
(seq [this]
(let [s (seq coll)]
(if (chunked-seq? s)
(seq (chunked-sequence xform coll))
#_(iterator-seq (.iterator this))
(seq (clojure.core/->Eduction xform coll)))))]
:cljs
[ISeqable
(-seq [this]
(let [s (seq coll)]
(if (chunked-seq? s)
(seq (chunked-sequence xform coll))
(seq (sequence xform coll)))))]))
(defn eduction2 [& xforms]
(Eduction2. (apply comp (butlast xforms)) (last xforms)))
;;;;;;;;;;;;;;;;;
;; Generate
(s/def ::small-ints (into #{} (range 30)))
(defn gen-partition-by []
(s/gen #{(comp (partition-by even?) cat)}))
(defn gen-take []
(gen/fmap #(take %) (s/gen (set (range 1000)))))
(defn gen-map []
(s/gen #{(map inc) (map #(* -1 %))}))
(defn gen-filter []
(s/gen #{(filter even?)}))
(defn gen-drop []
(gen/fmap #(drop %) (s/gen ::small-ints)))
(defn gen-mapcat []
(gen/fmap #(mapcat (fn [x] (repeat % x)))
(s/gen #{1 2 3})))
;; To make sure we're calling the transducers the same amount of time.
(defn counting-xf []
(fn [rf]
(let [counter (atom 0)]
(fn
([] (rf))
([acc] (rf (unreduced (rf acc @counter))))
([acc res]
(swap! counter inc)
(rf acc res))))))
(defn gen-counting-xf []
(s/gen #{(counting-xf)}))
(defn gen-xforms []
(gen/not-empty
(gen/vector
(gen/one-of
[(gen-take)
(gen-partition-by)
(gen-map)
(gen-filter)
(gen-drop)
(gen-counting-xf)
(gen-mapcat)]))))
(defn check-eduction2 [n]
(tc/quick-check
n
(prop/for-all [xforms (gen-xforms)
i (s/gen ::small-ints)]
(let [args (concat xforms [(range i)])]
(= (seq (apply eduction args))
(seq (apply eduction2 args)))))))
(defn check-sequence2 [n]
(tc/quick-check
n
(prop/for-all [xforms (gen-xforms)
i (s/gen ::small-ints)]
(let [xforms (apply comp xforms)
r (range i)]
(= (seq (sequence xforms r))
(seq (sequence2 xforms r)))))))
;;;;;;;;;;;;;;;;;
;; Benchmarking
#?(:clj
(do
(defmacro if-cljs
[then else]
(if (:ns &env) then else))
(defn clj-benchmark [f runs]
(let [body# `(dotimes [~'_ ~runs]
((fn [] ~f)))]
`(do
;; Warm up
~body#
(time ~body#))))
(defmacro benchmark [_ f runs]
`(if-cljs
(do
;; Warm up
(dotimes [~'_ ~runs] ((fn [] ~f)))
(cljs.core/simple-benchmark [] ~f ~runs))
~(clj-benchmark f runs)))))
(defn -main [& args]
(prn "quick check (= eduction2 clojure.core/eduction")
(prn (check-eduction2 200))
(prn (check-sequence2 200))
(let [v (into [] (range 100000))
coll (seq v)]
(prn "Benchmarking eduction with chunked-seqs")
(prn "core/eduction - (first (filter ...))")
(benchmark []
(first (eduction (filter (fn [x] (< 80000 x))) coll))
500)
(prn "new eduction - (first (filter ...))")
(benchmark []
(first (eduction2 (filter (fn [x] (< 80000 x))) coll))
500)
(prn "core/eduction - (take 1000) (mapcat range)")
(benchmark []
(run! (fn [_])
(seq (eduction (take 1000) (mapcat range) coll)))
50)
(prn "new eduction - (take 1000) (mapcat range)")
(benchmark []
(run! (fn [_])
(seq (eduction2 (take 1000) (mapcat range) coll)))
50)
(prn "Benchmarking sequence with vectors")
(prn "core/sequence - (first (filter ...))")
(benchmark []
(first (sequence (filter (fn [x] (< 80000 x))) v))
500)
(prn "new sequence2 - (first (filter ...))")
(benchmark []
(first (sequence2 (filter (fn [x] (< 80000 x))) v))
500)
(prn "core/sequence - (take 1000) (mapcat range)")
(benchmark []
(run! (fn [_])
(seq (sequence (comp (take 1000) (mapcat range)) v)))
50)
(prn "new sequence2 - (take 1000) (mapcat range)")
(benchmark []
(run! (fn [_])
(seq (sequence2 (comp (take 1000) (mapcat range)) v)))
50)
))
(comment
;; clj repl output:
"quick check (= eduction2 clojure.core/eduction"
"{:result true, :num-tests 200, :seed 1528128367876}"
"Benchmarking eduction with chunked-seqs"
"core/eduction - (first (filter ...))"
"Elapsed time: 1687.285017 msecs"
"new eduction - (first (filter ...))"
"Elapsed time: 637.132507 msecs"
;; 1687/637 = 2,6483516484x speed up
"core/eduction - (take 1000) (mapcat range)"
"Elapsed time: 1768.182948 msecs"
"new eduction - (take 1000) (mapcat range)"
"Elapsed time: 905.295919 msecs"
;; 1768/905 = 1,9535911602x speed up
"Benchmarking sequence with vectors"
"core/sequence - (first (filter ...))"
"Elapsed time: 750.904259 msecs"
"new sequence2 - (first (filter ...))"
"Elapsed time: 704.927186 msecs"
;; 750/704 = 1,0653409091x speed up
"core/sequence - (take 1000) (mapcat range)"
"Elapsed time: 2011.894956 msecs"
"new sequence2 - (take 1000) (mapcat range)"
"Elapsed time: 779.168373 msecs"
;; 2011/779 = 2,5815147625x speed up
;; cljs output
"quick check (= eduction2 clojure.core/eduction"
"{:result true, :num-tests 200, :seed 1528128143496}"
"Benchmarking eduction with chunked-seqs"
"core/eduction - (first (filter ...))
[], (first (eduction ... coll)), 500 runs, 8531 msecs"
"new eduction - (first (filter ...))
[], (first (eduction2 ... coll)), 500 runs, 4302 msecs"
;; 8531/4302 = 1,9830311483x speed up
"core/eduction - (take 1000) (mapcat range)
[], (run! (fn [_]) (seq (eduction ... coll))), 50 runs, 11866 msecs"
"new eduction - (take 1000) (mapcat range)
[], (run! (fn [_]) (seq (eduction2 ... coll))), 50 runs, 6130 msecs"
;; 11866/6130 = 1,935725938x speed up
"Benchmarking sequence with vectors"
"core/sequence - (first (filter ...))
[], (first (sequence ... v)), 500 runs, 4265 msecs"
"new sequence2 - (first (filter ...))
[], (first (sequence2 ... v)), 500 runs, 3904 msecs"
;; 4265/3904 = 1,0924692623x speed up
"core/sequence - (take 1000) (mapcat range)
[], (run! (fn [_]) (seq (sequence ... v))), 50 runs, 14231 msecs"
"new sequence2 - (take 1000) (mapcat range)
[], (run! (fn [_]) (seq (sequence2 ... v))), 50 runs, 6953 msecs"
;; 14231/6953 = 2,0467424133x speed up
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment