Crowbrammer/ordered_mistakes.clj

## ordered_mistakes.clj
(ns ordered-mistakes
  ;; I used Criterium to benchmark
  #_(:require [criterium.core :refer [quick-bench]]))

(def test-state
  {:document {:paragraphs {:uuid-1 {}}
              :paragraph-order [:uuid-1 :uuid-2 :uuid-3 :uuid-4]}
   :edit {:spellcheck {:mistakes {:uuid-1 [[4 12] [38 42]]
                                  :uuid-3 [[7 12]]
                                  :uuid-4 []
                                  :uuid-X [[0 4]]}}}})

;; Original

(defn paragraph-order [state]
  (get-in state [:document :paragraph-order]))

(defn mistakes [state]
  (let [paragraph-order (paragraph-order state)]
    (->> (get-in state [:edit :spellcheck :mistakes])
         ;; only paragraphs with mistakes and present on paragraph-order
         (filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order)))))))

(defn ordered-mistakes [state]
  (let [paragraph-order (paragraph-order state)]
    (->> (mistakes state)
         ;; sort-by paragraph-order
         (sort-by (fn [[k _]] (.indexOf paragraph-order k)))
         ;; "unpack" into [:p-uuid [word-start word-end]]
         (mapcat (fn [[k v]] (map (fn [i] [k i]) v)))
         (vec))))


;; Using transducers

(defn ordered-mistakes-xf
  [{{paragraph-order :paragraph-order} :document
    {{mistakes :mistakes} :spellcheck} :edit}]
  (let [xf (comp (map (fn [uuid] [uuid (get mistakes uuid)]))
                 (filter (fn [[_ mistake-ranges]] (seq mistake-ranges)))
                 (map (fn [[uuid mistake-ranges]] (for [r mistake-ranges] [uuid r]))))]
    (->> (transduce xf conj paragraph-order)
         (apply concat)
         (vec))))

(comment

  ;; Using transducers made this 4.6 times faster, taking
  ;; 21% of the time of the original due to performing all the
  ;; transformations (eagerly) on each element at once in a single
  ;; reduction instead of performing several seperate reductions
  ;; (which map and filter are syntactic sugar for).

  ;; This way is also pure versus the original which used the
  ;; `paragraph-order` function's results in the `mistakes`
  ;; function). This results in less psychic complexity and easier
  ;; programming.

  ;; This also also simplifies the functions into a single function
  ;; and makes use of destructuring though the `get-in` could be argued
  ;; as mentally cleaner.

  ;; averages around 0.14ms (on my machine)
  (time (ordered-mistakes test-state))
  ;; averages around 0.08ms
  (time (ordered-mistakes-xf test-state))

  ;; I also benchmarked this using Criterium:
  ;; https://clojars.org/criterium

  ;; 113,238 evals in 6 samples of 18,873 evals gives an
  ;; execution time mean of  5.890323 µs
  ;; (quick-bench (ordered-mistakes test-state) :verbose)

  ;; 525,288 evals in 6 samples of 87,548 evals gives an
  ;; execution time mean of 1.282512 µs
  ;; (quick-bench (ordered-mistakes-xf test-state) :verbose)

  :rcf)
	(ns ordered-mistakes
	;; I used Criterium to benchmark
	#_(:require [criterium.core :refer [quick-bench]]))

	(def test-state
	{:document {:paragraphs {:uuid-1 {}}
	:paragraph-order [:uuid-1 :uuid-2 :uuid-3 :uuid-4]}
	:edit {:spellcheck {:mistakes {:uuid-1 [[4 12] [38 42]]
	:uuid-3 [[7 12]]
	:uuid-4 []
	:uuid-X [[0 4]]}}}})

	;; Original

	(defn paragraph-order [state]
	(get-in state [:document :paragraph-order]))

	(defn mistakes [state]
	(let [paragraph-order (paragraph-order state)]
	(->> (get-in state [:edit :spellcheck :mistakes])
	;; only paragraphs with mistakes and present on paragraph-order
	(filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order)))))))

	(defn ordered-mistakes [state]
	(let [paragraph-order (paragraph-order state)]
	(->> (mistakes state)
	;; sort-by paragraph-order
	(sort-by (fn [[k _]] (.indexOf paragraph-order k)))
	;; "unpack" into [:p-uuid [word-start word-end]]
	(mapcat (fn [[k v]] (map (fn [i] [k i]) v)))
	(vec))))


	;; Using transducers

	(defn ordered-mistakes-xf
	[{{paragraph-order :paragraph-order} :document
	{{mistakes :mistakes} :spellcheck} :edit}]
	(let [xf (comp (map (fn [uuid] [uuid (get mistakes uuid)]))
	(filter (fn [[_ mistake-ranges]] (seq mistake-ranges)))
	(map (fn [[uuid mistake-ranges]] (for [r mistake-ranges] [uuid r]))))]
	(->> (transduce xf conj paragraph-order)
	(apply concat)
	(vec))))

	(comment

	;; Using transducers made this 4.6 times faster, taking
	;; 21% of the time of the original due to performing all the
	;; transformations (eagerly) on each element at once in a single
	;; reduction instead of performing several seperate reductions
	;; (which map and filter are syntactic sugar for).

	;; This way is also pure versus the original which used the
	;; `paragraph-order` function's results in the `mistakes`
	;; function). This results in less psychic complexity and easier
	;; programming.

	;; This also also simplifies the functions into a single function
	;; and makes use of destructuring though the `get-in` could be argued
	;; as mentally cleaner.

	;; averages around 0.14ms (on my machine)
	(time (ordered-mistakes test-state))
	;; averages around 0.08ms
	(time (ordered-mistakes-xf test-state))

	;; I also benchmarked this using Criterium:
	;; https://clojars.org/criterium

	;; 113,238 evals in 6 samples of 18,873 evals gives an
	;; execution time mean of 5.890323 µs
	;; (quick-bench (ordered-mistakes test-state) :verbose)

	;; 525,288 evals in 6 samples of 87,548 evals gives an
	;; execution time mean of 1.282512 µs
	;; (quick-bench (ordered-mistakes-xf test-state) :verbose)

	:rcf)