Skip to content

Instantly share code, notes, and snippets.

@Crowbrammer
Created April 22, 2022 17:24
Show Gist options
  • Save Crowbrammer/0a7ea35132cb3dec38792cbd6ad77b4e to your computer and use it in GitHub Desktop.
Save Crowbrammer/0a7ea35132cb3dec38792cbd6ad77b4e to your computer and use it in GitHub Desktop.
Benchmark for interval summations
;; tldr
;; distinct, slow
;; "Elapsed time: 84.1618 msecs"
;; set
;; "Elapsed time: 32.7301 msecs"
;; non-overlapping
;; "Elapsed time: 1.6902 msecs"
;; transducers
;; "Elapsed time: 0.5955 msecs"
(ns seq-interval-benchmark
"Test the speed of various seq interval summation solutions.")
(defn gen-sequence-intervals
"Used for benchmarking sum-of-sequence solutions. Use this to
generate as many random sequence intervals as you want."
[num-intervals, sequence-length]
;; Start with an empty collection
;; Random int between 0 and sequence length
(loop [intervals-left (dec num-intervals) intervals []]
(let [first-rand (rand-int sequence-length)
;; Random int between ^ and sequence length, exclusive
;; resulting number needs to be after first rand int (because end)
;; rand-int always starts from 0, need a new start
;; Subtract first rand-int from sequence-length
;; rand-int
;; Add back that first rand-int to the number
second-rand (-> sequence-length (- first-rand) (rand-int) (+ first-rand))
;; Make a sequence of the two numbers in order
;; Add it to the initial collection
updated-intervals (conj intervals [first-rand second-rand])]
;; More intervals to go? Do this again with the existing collection
(if (pos? intervals-left)
(recur (dec intervals-left) updated-intervals)
updated-intervals))))
;; (println (gen-sequence-intervals 30 10000))
;; Benchmark all the solutions (scroll to the bottom)
(defn sum-intervals-slow
[intervals]
(->> intervals
(mapcat (partial apply range))
distinct
count))
(defn collapse-intervals
[[a1 a2] [b1 b2]]
[(min a1 b1) (max a2 b2)])
(defn overlapping?
[[a1 a2] [b1 b2]]
(or (and (>= a2 b1)
(< a1 b2))
(and (>= b2 a1)
(< b1 a2))))
(defn distinct-intervals
[intervals]
(let [sorted-intervals (sort intervals)]
(loop [[i & more :as intervals] (rest sorted-intervals)
current-interval (first sorted-intervals)
new-intervals []]
(if (seq intervals)
(if (overlapping? current-interval i)
(recur more (collapse-intervals current-interval i) new-intervals)
(recur more i (conj new-intervals current-interval)))
(if current-interval
(conj new-intervals current-interval)
new-intervals)))))
(defn interval-size
[[min max]]
(- max min))
(defn sum-intervals-fast
[intervals]
(transduce (map interval-size) + (distinct-intervals intervals)))
(def distinct-intervals-xf
(fn [rf]
(let [current-interval (volatile! nil)]
(fn
([] (rf))
([result]
(if @current-interval
(rf (rf result @current-interval))
(rf result)))
([result interval]
(if @current-interval
(if (overlapping? @current-interval interval)
(do (vswap! current-interval collapse-intervals interval)
result)
(let [res (rf result @current-interval)]
(vreset! current-interval interval)
res))
(do (vreset! current-interval interval)
result)))))))
(defn sum-intervals-xf
[intervals]
(transduce (comp distinct-intervals-xf (map interval-size)) + (sort intervals)))
(def sum-intervals sum-intervals-xf)
(defn sum-intervals-set
"c describes parts of a sequence. Some parts overlap.
Give a sum of all the unique items in the sequence."
[c]
;; Build the items
;; Pour 'em all into a single collection
;; Set 'em
;; Count 'em.
(->> (map #(range (first %1) (second %1)) c)
(apply concat)
(set)
(count)))
;; Prebaked long sequence using generator
(def intervals [[8531 8966] [9696 9862] [4452 7219] [9621 9836] [6737 7289] [6689 9582] [5858 5938] [5638 5664] [5936 9083] [5280 6579] [5148 6866] [6418 8798] [8991 9285] [6775 8350] [4659 8228] [9887 9982] [408 5142] [8212 9966] [7238 7743] [6813 7386] [1473 5581] [2880 3598] [5905 9872] [8103 9797] [4044 4305] [5426 9043] [6712 7175] [9075 9549] [2734 8165] [6938 7098]])
;; distinct
(println "distinct, slow")
(time (sum-intervals-slow intervals))
;; set
(println "set")
(time (sum-intervals-set intervals))
;; non-overlapping
(println "non-overlapping")
(time (sum-intervals-fast intervals))
;; transducers
(println "transducers")
(time (sum-intervals-xf intervals))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment