erdos/split-by-key1.clj

## split-by-key1.clj
(do
    ;; number of groups
    (def n (.availableProcessors (Runtime/getRuntime)))

    ;; memoized round-robin style group number generator for items
    (let [a (atom (cycle (range n)))]
        (def get-id-for
            (memoize (fn [_] (first (swap! a next))))))

    (defn split-by-key
        "Distributes all items in n sequences according to f(x)"
        [f items]
        (for [id (range n)]
            (filter (comp #{id} get-id-for f) items)))

    ;; example: (split-by-key int (range 16)) => ((7 15) (0 8) (1 9) (2 10) (3 11) (4 12) (5 13) (6 14))

    ;; i am using (future) to realize the lazy groups returned by #'split-by-key
    (let [total 10000
          xs (range total)
          gs (split-by-key int xs)]
      (doseq [g gs]
        ;; do work here.
        (future (reduce + g)))
    ;; i expect the sum to be = total, but it is not constant.
    ;; that is because an item in xs may go to multiple groups.
    (reduce + (map count gs)))

)
	(do
	;; number of groups
	(def n (.availableProcessors (Runtime/getRuntime)))

	;; memoized round-robin style group number generator for items
	(let [a (atom (cycle (range n)))]
	(def get-id-for
	(memoize (fn [_] (first (swap! a next))))))

	(defn split-by-key
	"Distributes all items in n sequences according to f(x)"
	[f items]
	(for [id (range n)]
	(filter (comp #{id} get-id-for f) items)))

	;; example: (split-by-key int (range 16)) => ((7 15) (0 8) (1 9) (2 10) (3 11) (4 12) (5 13) (6 14))

	;; i am using (future) to realize the lazy groups returned by #'split-by-key
	(let [total 10000
	xs (range total)
	gs (split-by-key int xs)]
	(doseq [g gs]
	;; do work here.
	(future (reduce + g)))
	;; i expect the sum to be = total, but it is not constant.
	;; that is because an item in xs may go to multiple groups.
	(reduce + (map count gs)))

	)