Skip to content

Instantly share code, notes, and snippets.

@ericnormand
Last active July 14, 2019 20:39
Show Gist options
  • Save ericnormand/611d0e06df78920e8e0be81ab97b0e9a to your computer and use it in GitHub Desktop.
Save ericnormand/611d0e06df78920e8e0be81ab97b0e9a to your computer and use it in GitHub Desktop.

mergesort

mergesort is one of the earlier sorting algorithms. It was developed and characterized by none other than von Neumann. It's a great example of a divide-and-conquer algorithm, and it's quite fun to write in Clojure.

So that's the task: implement mergesort.

You can find descriptions of mergesort by searching. But remember, it's easy in Clojure because recursion is the norm.

Note: Make sure it works on large lists. Try it on a list of 1 million numbers.

Bonus:

  1. Have it use compare to work on more than numbers.
  2. Have it take a key to sort by.
  3. Use clojure.reducers to implement a parallel version.
(defn merge [l1 l2]
(lazy-seq
(cond
(empty? l1)
l2
(empty? l2)
l1
(< (first l1) (first l2))
(cons (first l1) (merge (rest l1) l2))
:else
(cons (first l2) (merge l1 (rest l2))))))
(defn mergesort* [v]
(case (count v)
0 ()
1 (seq v)
;; else
(let [half (quot (count v) 2)]
(merge (mergesort* (subvec v 0 half))
(mergesort* (subvec v half))))))
(defn mergesort [ls]
(seq (mergesort* (vec ls))))
(ns clj-challenge.merge-sort
"Sorts numbers using the Merge Sort algorithm"
(:require [clojure.set :as clj-set]
[clojure.string :as cs]))
(defn do-merge
"Merges two sorted sequences - `left` and `right`. The sequences are
destructured as e.g.
x - the first element
xs - the rest of the elements
lt - the whole left sequence
If either first element is `nil` then we can safely append them to the result.
Otherwise, we still have elements, and so we append the smaller of the two onto
the result of a recursive call with the rest of the elements from that sequence
and the other sequence."
[left right]
(loop [[x & xs :as lt] left
[y & ys :as rt] right
result []]
(cond
(or (nil? x) (nil? y)) (concat result lt rt)
(< x y) (recur xs rt (conj result x))
:else (recur lt ys (conj result y)))))
(defn merge-sort [arg]
(let [cnt (count arg)
mid (/ cnt 2)
left (take mid arg)
right (drop mid arg)]
(if (= 1 cnt)
arg
(do-merge (merge-sort left) (merge-sort right)))))
;;
;; A few REPL expressions to try things out
;;
(comment
(def data1 [1 9 2 8 3 7 4 6 5])
(def data2 [1 9 2 8 3 7 4 6 5 10])
(merge-sort data1)
(merge-sort data2)
(do-merge [1 4 4 5] [2 3 7 9])
(do-merge [1] [2 5])
;; Make sure there's no StackOverflow
(do-merge (range 1000000) (range 1000000 2000000))
(merge-sort (take 1000000 (map rand-int (repeat 1000000)))))
(ns scratchpad.pftv.334
(:require [clojure.string :as str]
[clojure.test :refer :all]
[clojure.core.reducers :as r]))
(defn combine [compare-fn left right]
(loop [a left
b right
result []]
(cond
(empty? a) (concat result b)
(empty? b) (concat result a)
:else
(if (neg? (compare-fn (first a) (first b)))
(recur (rest a) b (conj result (first a)))
(recur a (rest b) (conj result (first b)))))))
(defn mergesort [compare-fn lst]
;; list of 0 or 1 is sorted.
(if (<= (count lst) 1)
lst
;; split in middle recursively and combine results
(let [mid (int (/ (count lst) 2))
[left right] (split-at mid lst)]
(combine compare-fn
(mergesort compare-fn left)
(mergesort compare-fn right)))))
(deftest test-combine
(is (= (combine compare [5 6] [1 7]) [1 5 6 7]))
(is (= (combine compare [4] [1 2 3]) [1 2 3 4]))
(is (= (combine compare [1 2 3] [4]) [1 2 3 4]))
(is (= (combine compare (range 500000 1000000) (range 0 500000)) (range 0 1000000))))
(deftest test-mergesort
(is (= (mergesort compare [8 1 5 1 5 2 3]) [1 1 2 3 5 5 8]))
(is (= (mergesort compare (reverse (range 0 1000000))) (range 1000000))))
(run-tests)
(ns miner.mergesort)
;; https://purelyfunctional.tv/issues/purelyfunctional-tv-newsletter-334-tip-can-you-fill-in-the-blanks/
;; mergesort
;; https://en.wikipedia.org/wiki/Merge_sort
;;
;; Bonus:
;; Have it use compare to work on more than numbers.
;; Have it take a key to sort by.
;; Use clojure.reducers to implement a parallel version.
(defn mergesort
([coll] (mergesort identity coll))
([fkey coll]
(if (empty? coll)
coll
(let [merge2 (fn [as bs]
(loop [as as bs bs res []]
(cond (empty? as) (into res bs)
(empty? bs) (into res as)
:else (let [a (first as)
b (first bs)]
(if (pos? (compare (fkey a) (fkey b)))
(recur as (rest bs) (conj res b))
(recur (rest as) bs (conj res a)))))))]
(loop [runs (mapv list coll)]
(if (= (count runs) 1)
(peek runs)
(recur (loop [[a b & cs] runs res []]
(cond (nil? a) res
(nil? b) (conj res a)
:else (recur cs (conj res (merge2 a b))))))))))))
(defn partition-while
"Returns a lazy sequence of partitions with each partition containing a run of elements for
which `pred2` returns true when applied to the previous element and the current input. The
first input goes into the first partition without calling `pred2`. Returns a stateful
transducer when no collection is provided."
([pred2]
(fn [rf]
(let [a (java.util.ArrayList.)]
(fn
([] (rf))
([result]
(let [result (if (.isEmpty a)
result
(let [v (vec (.toArray a))]
;;clear first!
(.clear a)
(unreduced (rf result v))))]
(rf result)))
([result input]
(if (or (.isEmpty a) (pred2 (.get a (dec (.size a))) input))
(do
(.add a input)
result)
(let [v (vec (.toArray a))]
(.clear a)
(let [ret (rf result v)]
(when-not (reduced? ret)
(.add a input))
ret))))))))
([pred2 coll]
(sequence (partition-while pred2) coll)))
;; slightly faster with some transducers
(defn xmergesort
([coll] (xmergesort identity coll))
([fkey coll]
(if (empty? coll)
coll
(let [merge2 (fn
([as] as)
([as bs]
(loop [as as bs bs res []]
(cond (empty? as) (into res bs)
(empty? bs) (into res as)
:else (let [a (first as)
b (first bs)]
(if (pos? (compare (fkey a) (fkey b)))
(recur as (rest bs) (conj res b))
(recur (rest as) bs (conj res a))))))))
lte (fn [a b] (not (pos? (compare (fkey a) (fkey b)))))]
(loop [runs (into [] (partition-while lte) coll)]
(if (= (count runs) 1)
(peek runs)
(recur (into [] (comp (partition-all 2) (map #(apply merge2 %))) runs))))))))
;; My reducers version wasn't faster so I didn't include it.
(defn smoke-sort []
(let [xs (repeatedly (long 1e6) #(rand-int 10000))]
(assert (= (sort xs) (mergesort xs) (xmergesort xs))))
(let [amaps (shuffle (map #(hash-map :a % :b (- %)) (range 1000)))]
(assert (= (sort-by :a amaps) (mergesort :a amaps) (xmergesort :a amaps)))
(assert (= (sort-by :b amaps) (mergesort :b amaps) (xmergesort :b amaps))))
true)
(ns mergesort.mergesort
(:require [cljs.test :refer-macros [deftest is run-tests]]))
(defn merge
"Merge two sorted lists to a sorted list."
[xs ys]
(cond
(empty? xs) ys
(empty? ys) xs
(<= (first xs) (first ys))
(cons (first xs) (merge (rest xs) ys))
(<= (first ys) (first xs))
(cons (first ys) (merge xs (rest ys)))))
(defn mergesort
"Mergesort algorithm. Hey, von Neuman!"
[xs]
(if (empty? (rest xs))
xs
(let [left (take (/ (count xs) 2) xs)
right (drop (/ (count xs) 2) xs)]
(merge (mergesort left) (mergesort right)))))
;;; tests
(deftest merge-test
(is (= (merge [] []) []))
(is (= (merge [1] []) [1]))
(is (= (merge [] [1]) [1]))
(is (= (merge [] [1 2]) [1 2]))
(is (= (merge [1 2] [3 4]) [1 2 3 4]))
(is (= (merge [1 4] [2 3]) [1 2 3 4]))
)
(deftest mergesort-test
(is (= (mergesort []) []))
(is (= (mergesort [1]) [1]))
(is (= (mergesort [2 1]) [1 2]))
(is (= (mergesort [1 2]) [1 2]))
(is (= (mergesort [3 2 1]) [1 2 3]))
(is (= (mergesort [1 2 1]) [1 1 2]))
#_(is (= (mergesort [3 2 2 3 77 1 5 83]) [1 2 2 3 3 5 77 83]))
)
(run-tests)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment