Created
April 22, 2022 17:24
-
-
Save Crowbrammer/0a7ea35132cb3dec38792cbd6ad77b4e to your computer and use it in GitHub Desktop.
Benchmark for interval summations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; tldr | |
;; distinct, slow | |
;; "Elapsed time: 84.1618 msecs" | |
;; set | |
;; "Elapsed time: 32.7301 msecs" | |
;; non-overlapping | |
;; "Elapsed time: 1.6902 msecs" | |
;; transducers | |
;; "Elapsed time: 0.5955 msecs" | |
(ns seq-interval-benchmark | |
"Test the speed of various seq interval summation solutions.") | |
(defn gen-sequence-intervals | |
"Used for benchmarking sum-of-sequence solutions. Use this to | |
generate as many random sequence intervals as you want." | |
[num-intervals, sequence-length] | |
;; Start with an empty collection | |
;; Random int between 0 and sequence length | |
(loop [intervals-left (dec num-intervals) intervals []] | |
(let [first-rand (rand-int sequence-length) | |
;; Random int between ^ and sequence length, exclusive | |
;; resulting number needs to be after first rand int (because end) | |
;; rand-int always starts from 0, need a new start | |
;; Subtract first rand-int from sequence-length | |
;; rand-int | |
;; Add back that first rand-int to the number | |
second-rand (-> sequence-length (- first-rand) (rand-int) (+ first-rand)) | |
;; Make a sequence of the two numbers in order | |
;; Add it to the initial collection | |
updated-intervals (conj intervals [first-rand second-rand])] | |
;; More intervals to go? Do this again with the existing collection | |
(if (pos? intervals-left) | |
(recur (dec intervals-left) updated-intervals) | |
updated-intervals)))) | |
;; (println (gen-sequence-intervals 30 10000)) | |
;; Benchmark all the solutions (scroll to the bottom) | |
(defn sum-intervals-slow | |
[intervals] | |
(->> intervals | |
(mapcat (partial apply range)) | |
distinct | |
count)) | |
(defn collapse-intervals | |
[[a1 a2] [b1 b2]] | |
[(min a1 b1) (max a2 b2)]) | |
(defn overlapping? | |
[[a1 a2] [b1 b2]] | |
(or (and (>= a2 b1) | |
(< a1 b2)) | |
(and (>= b2 a1) | |
(< b1 a2)))) | |
(defn distinct-intervals | |
[intervals] | |
(let [sorted-intervals (sort intervals)] | |
(loop [[i & more :as intervals] (rest sorted-intervals) | |
current-interval (first sorted-intervals) | |
new-intervals []] | |
(if (seq intervals) | |
(if (overlapping? current-interval i) | |
(recur more (collapse-intervals current-interval i) new-intervals) | |
(recur more i (conj new-intervals current-interval))) | |
(if current-interval | |
(conj new-intervals current-interval) | |
new-intervals))))) | |
(defn interval-size | |
[[min max]] | |
(- max min)) | |
(defn sum-intervals-fast | |
[intervals] | |
(transduce (map interval-size) + (distinct-intervals intervals))) | |
(def distinct-intervals-xf | |
(fn [rf] | |
(let [current-interval (volatile! nil)] | |
(fn | |
([] (rf)) | |
([result] | |
(if @current-interval | |
(rf (rf result @current-interval)) | |
(rf result))) | |
([result interval] | |
(if @current-interval | |
(if (overlapping? @current-interval interval) | |
(do (vswap! current-interval collapse-intervals interval) | |
result) | |
(let [res (rf result @current-interval)] | |
(vreset! current-interval interval) | |
res)) | |
(do (vreset! current-interval interval) | |
result))))))) | |
(defn sum-intervals-xf | |
[intervals] | |
(transduce (comp distinct-intervals-xf (map interval-size)) + (sort intervals))) | |
(def sum-intervals sum-intervals-xf) | |
(defn sum-intervals-set | |
"c describes parts of a sequence. Some parts overlap. | |
Give a sum of all the unique items in the sequence." | |
[c] | |
;; Build the items | |
;; Pour 'em all into a single collection | |
;; Set 'em | |
;; Count 'em. | |
(->> (map #(range (first %1) (second %1)) c) | |
(apply concat) | |
(set) | |
(count))) | |
;; Prebaked long sequence using generator | |
(def intervals [[8531 8966] [9696 9862] [4452 7219] [9621 9836] [6737 7289] [6689 9582] [5858 5938] [5638 5664] [5936 9083] [5280 6579] [5148 6866] [6418 8798] [8991 9285] [6775 8350] [4659 8228] [9887 9982] [408 5142] [8212 9966] [7238 7743] [6813 7386] [1473 5581] [2880 3598] [5905 9872] [8103 9797] [4044 4305] [5426 9043] [6712 7175] [9075 9549] [2734 8165] [6938 7098]]) | |
;; distinct | |
(println "distinct, slow") | |
(time (sum-intervals-slow intervals)) | |
;; set | |
(println "set") | |
(time (sum-intervals-set intervals)) | |
;; non-overlapping | |
(println "non-overlapping") | |
(time (sum-intervals-fast intervals)) | |
;; transducers | |
(println "transducers") | |
(time (sum-intervals-xf intervals)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment