Skip to content

Instantly share code, notes, and snippets.

@gfredericks
Last active September 23, 2020 01:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gfredericks/f75c95eb06fc4deacf6cf3aa9cdabb27 to your computer and use it in GitHub Desktop.
Save gfredericks/f75c95eb06fc4deacf6cf3aa9cdabb27 to your computer and use it in GitHub Desktop.
This function I wrote one time
(ns user.test
(:require
[clojure.test :refer [deftest is]]
[clojure.test.check.results :as results]
[clojure.test.check.generators :as gen]
[clojure.test.check.properties :as prop]
[clojure.test.check.clojure-test :refer [defspec]]))
(def gen-lazy-flatten-merge-by-keyfn
(gen/elements [identity - #(mod % 17)]))
(defn gen-lazy-flatten-merge-by-args
[gen-keyfn]
(gen/let [[xss keyfn]
(gen/tuple (gen/vector (gen/not-empty (gen/vector gen/large-integer)))
gen-keyfn)]
{:keyfn keyfn
:xss (->> xss
(map (fn [xs]
(with-meta (sort-by keyfn (rest xs)) {:order-k (first xs)})))
(map-indexed vector)
(sort-by (fn [[idx seq]]
[(keyfn (or (first seq) (:order-k (meta seq)))) idx]))
(map second))}))
(defspec lazy-flatten-merge-by 100
(prop/for-all [{:keys [xss keyfn]} (gen-lazy-flatten-merge-by-args
gen-lazy-flatten-merge-by-keyfn)]
(let [expected
(->> xss
(map-indexed (fn [idx xs]
(map-indexed #(vector (keyfn %2) idx %1 %2) xs)))
(apply concat)
(sort)
(map last))
actual
(doall (lazy-flatten-merge-by keyfn xss))]
(reify results/Result
(pass? [_] (= expected actual))
(result-data [_] {:expected expected, :actual actual})))))
(deftest lazy-flatten-merge-by-regression
(doseq [[in out keyfn] [[[] []]
[[[]] []]
[[[0]] [0]]
[[[0 0]] [0 0]]
[[[0 0 0]] [0 0 0]]
[[[0 1] [0]] [0 0 1]]
[[[-1 0] [-1 0]] [-1 -1 0 0]]
[[[-3 0] [-2 -1]] [-3 -2 -1 0]]
[[[-1 1] [0] [0]] [-1 0 0 1]]
[[[0 1] [0] [1]] [0 0 1 1]]
[[[-1 0] [-1 0] [0]] [-1 -1 0 0 0]]
[[[5 -12]] [5 -12] #(mod % 17)]
[[[0 -2 -1] [-2]] [0 -2 -2 -1] #(mod % 17)]
[[[-2 1] [-1 0] [0]] [-2 -1 0 0 1]]
[[[0 -1] [0 -1] [0]] [0 0 0 -1 -1] -]
[[[0 2] [0 1] [0]] [0 0 0 1 2]]
[[[0 1 2] [0 1 2] [0 1]] [0 0 0 1 1 1 2 2]]
[[[0 3] [0 -15 4] [0 1 -1]] [0 0 0 1 -15 3 4 -1] #(mod % 17)]]
:let [keyfn (or keyfn identity)]]
(is (= out (lazy-flatten-merge-by keyfn in)))))
(defspec lazy-flatten-merge-by-laziness-test 100
(prop/for-all [{:keys [xss keyfn]}
;; have to use identity because of how we're
;; orchestrating things below
(gen-lazy-flatten-merge-by-args (gen/return identity))]
(let [total (reduce + (map count xss))
max (apply max 42 (apply concat xss))
xss-with-bombs (map-indexed (fn [idx xs]
;; not really anything useful
;; we can do with empty seqs
(if (empty? xs)
xs
(concat xs
(cons (+ max 1 idx)
(lazy-seq (throw (Exception. "NOT LAZY ENOUGH")))))))
xss)
xss-with-final-bomb (concat xss-with-bombs
(cons [max]
(lazy-seq (throw (Exception. "DEFINITELY NOT LAZY ENOUGH")))))
result (lazy-flatten-merge-by keyfn xss-with-final-bomb)]
(= (sort (apply concat xss))
(sort (take total result))))))
(defn ^:private lazy-flatten-merge-by
"Returns a lazy sequence of the elements from seqs interleaved according
to keyfn. The seqs themselves must each be internally ordered, and the
seq of seqs must be ordered by first element.
This function will not realize any element of the seq-of-seqs or any
of the seqs therein before it is strictly necessary."
[keyfn seqs]
(let [seq-pairs (->> seqs
(remove empty?)
(map-indexed vector))]
(letfn [(start [seq-pairs]
(if-let [seq-pairs (seq seq-pairs)]
(let [[idx seq] (first seq-pairs)]
(just-pairs (keyfn (first seq)) idx seq (rest seq-pairs)))
()))
(just-pairs [k idx seq seq-pairs]
(cons (first seq)
(lazy-seq
(if-let [seq (next seq)]
(let [old-k k
x (first seq)
k (keyfn x)]
(cond (< k old-k)
(throw (ex-info "Ordering violation!" {:k1 old-k :k2 k}))
(= k old-k)
(just-pairs k idx seq seq-pairs)
:else
(if-let [[idx' seq'] (first seq-pairs)]
(let [x' (first seq')
k' (keyfn x')
[live-seq live-k live-idx stale-seq stale-k stale-idx]
(if (< k' k)
[seq' k' idx' seq k idx]
[seq k idx seq' k' idx'])]
(queue-and-pairs live-k live-idx live-seq
(sorted-map [stale-k stale-idx] stale-seq)
stale-k stale-idx
(rest seq-pairs)
k'))
;; just one seq left, return it
seq)))
;; exhausted the seq, so we're back to where we
;; started
(start seq-pairs)))))
(just-queue [k idx seq q q-k q-idx]
(cons (first seq)
(lazy-seq
(if-let [seq (next seq)]
;; okay, check if the k has changed, and if so,
;; compare to the q
(let [old-k k
x (first seq)
k (keyfn x)]
(cond
(< k old-k)
(throw (ex-info "Ordering violation!" {:k1 old-k :k2 k}))
(= k old-k)
;; k isn't changing, keep going with this seq
(just-queue k idx seq q q-k q-idx)
:else
;; the k increased, so let's compare it to the best in
;; the queue to see which to use
(if (neg? (compare [k idx] [q-k q-idx]))
;; the live seq is still top priority
(just-queue k idx seq q q-k q-idx)
;; switch with top q item
(let [q-pair [q-k q-idx]
q-seq (get q q-pair)
q (-> q
(dissoc q-pair)
(assoc [k idx] seq))
[new-q-k new-q-idx] (first (keys q))]
(just-queue q-k q-idx q-seq q new-q-k new-q-idx)))))
(let [[pair seq] (first q)
q (dissoc q pair)]
(if (zero? (count q))
;; last thing in the queue, return it
seq
;; more things in the queue, continue with
;; just-queue
(let [[k idx] pair
[q-k q-idx] (first (keys q))]
(just-queue k idx seq q q-k q-idx))))))))
(queue-and-pairs [k idx seq q q-k q-idx seq-pairs last-seq-k]
(cons (first seq)
(lazy-seq
(if-let [seq (next seq)]
(let [old-k k
x (first seq)
k (keyfn x)]
(cond
(< k old-k)
(throw (ex-info "Ordering violation!" {:k1 old-k :k2 k}))
(= k old-k)
;; keep going
(queue-and-pairs k idx seq q q-k q-idx seq-pairs last-seq-k)
:else
;; the key increased, so have to check if we should be using something
;; else; first compare it to the top of the queue
(if (neg? (compare [k idx] [q-k q-idx]))
;; okay, well we don't need the top of the queue, but we also need
;; to check if we need to check a new seq pair
(if (<= k last-seq-k)
;; no need to check a new seq pair, just keep going
(queue-and-pairs k idx seq q q-k q-idx seq-pairs last-seq-k)
;; live seq is better than the queue, but the next seq-pair
;; might be even better, so check that
(if-let [seq-pairs (clojure.core/seq seq-pairs)]
(let [[new-idx new-seq] (first seq-pairs)
new-k (keyfn (first new-seq))]
;; okay, we've got the next seq; how does it compare
;; to the live seq?
(if (< new-k k)
;; new seq is better, so put the previous live seq
;; into the queue
(queue-and-pairs new-k new-idx new-seq
(assoc q [k idx] seq) k idx
(rest seq-pairs) new-k)
;; live seq is still better; put the new seq into the
;; queue
(let [q (assoc q [new-k new-idx] new-seq)
[new-q-k new-q-idx] (first (keys q))]
(queue-and-pairs k idx seq
q new-q-k new-q-idx
(rest seq-pairs) new-k))))
;; there aren't any seq-pairs anymore actually, so we can
;; transition to just-queue
(just-queue k idx seq q q-k q-idx)))
;; top of the queue is better than the live seq
(let [;; put the no-longer-live seq into the queue so
;; we don't have to think about it anymore;
;; at this point we know it's not top of queue
q (-> (assoc q [k idx] seq))]
;; figure out whether to just use the next queue seq, or
;; pull a new pair first
(if (<= q-k last-seq-k)
;; no need to consult seq-pairs, just swap with the queue
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (dissoc q q-key)
[new-q-k new-q-idx] (first (keys q))]
(queue-and-pairs q-k q-idx q-seq q new-q-k new-q-idx seq-pairs last-seq-k))
;; gotta check seq-pairs first
(if-let [seq-pairs (clojure.core/seq seq-pairs)]
;; okay, we have a new pair; will we use that or the
;; next thing on the queue?
(let [[idx seq] (first seq-pairs)
k (keyfn (first seq))]
(if (< k q-k)
;; gotta use this pair, not the top of queue
(queue-and-pairs k idx seq
q q-k q-idx
(rest seq-pairs) k)
;; top of queue is best, put this pair into
;; the queue
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (-> q
(dissoc q-key)
(assoc [k idx] seq))
[new-q-k new-q-idx] (first (keys q))]
(queue-and-pairs q-k q-idx q-seq
q new-q-k new-q-idx
(rest seq-pairs) k))))
;; no more pairs, but we do need to pull out the next
;; queue element
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (dissoc q q-key)
[new-q-k new-q-idx] (first (keys q))]
(just-queue q-k q-idx q-seq q new-q-k new-q-idx))))))))
;; no more seq, have to decide whether to use the
;; top of the queue or pull a new pair
(if (<= q-k last-seq-k)
;; can just use the top of the queue
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (dissoc q q-key)]
(if (zero? (count q))
;; we finished the queue, can go back to just-pairs
;;
;; losing information here by discarding last-seq-k :/
;; this means it's not maximally lazy at some point
;;
;; OTOH the laziness test passes, so maybe it's not
;; an issue for some reason?
(just-pairs q-k q-idx q-seq seq-pairs)
;; still got a queue
(let [[new-q-k new-q-idx] (first (keys q))]
(queue-and-pairs q-k q-idx q-seq
q new-q-k new-q-idx
seq-pairs last-seq-k))))
;; pull off the next pair and compare
(if-let [seq-pairs (clojure.core/seq seq-pairs)]
;; got a new pair, compare it
(let [[idx seq] (first seq-pairs)
k (keyfn (first seq))]
(if (< k q-k)
;; Use this pair, not the queue
(queue-and-pairs k idx seq q q-k q-idx (rest seq-pairs) k)
;; Use top of queue, and put this pair in the queue
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (-> q
(dissoc q-key)
(assoc [k idx] seq))
[new-q-k new-q-idx] (first (keys q))]
(queue-and-pairs q-k q-idx q-seq q new-q-k new-q-idx (rest seq-pairs) k))))
;; no more pairs either, so we just have a queue at this point
(let [q-key [q-k q-idx]
q-seq (get q q-key)
q (dissoc q q-key)]
(if (zero? (count q))
;; that was the last thing in the queue, we're done
q-seq
;; still got a queue
(let [[new-q-k new-q-idx] (first (keys q))]
(just-queue q-k q-idx q-seq q new-q-k new-q-idx))))))))))]
(lazy-seq
(start seq-pairs)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment