Skip to content

Instantly share code, notes, and snippets.

@joinr
Last active October 14, 2021 17:43
Show Gist options
  • Save joinr/74f6c3dc0d179978616101d9f1954536 to your computer and use it in GitHub Desktop.
Save joinr/74f6c3dc0d179978616101d9f1954536 to your computer and use it in GitHub Desktop.
exploring datasets and transducers in tech.ml.dataset
(ns transducertest
(:require [tech.v3.dataset :as ds]
[clojure.core.reducers :as r]))
(def d (ds/->dataset {:a [1 2 3] :b [:foo :bar :baz]}))
(defprotocol ITransposable
(-row-major [obj])
(-column-major [obj]))
(deftype row-view [ds rows]
clojure.lang.IPersistentCollection
(cons [this r]
(row-view. ds (reduce-kv (fn [acc k v]
(assoc acc k (conj (rows k []) v)))
rows r)))
(empty [this] (row-view. (empty ds) {}))
ITransposable
(-column-major [this]
(ds/->dataset rows))
(-row-major [this] this)
clojure.lang.Seqable
(seq [this] (concat (ds/mapseq-reader ds)
(ds/mapseq-reader (ds/->dataset rows)))))
(extend-protocol
clojure.core.protocols/CollReduce
row-view
(coll-reduce [coll f]
(reduce f (r/cat (ds/mapseq-reader (.ds coll))
(ds/mapseq-reader (ds/->dataset (.rows coll))))))
(coll-reduce [coll f init]
(reduce f init (r/cat (ds/mapseq-reader (.ds coll))
(ds/mapseq-reader (ds/->dataset (.rows coll)))))))
(extend-protocol
ITransposable
tech.v3.dataset.impl.dataset.Dataset
(-row-major [this] (row-view. this {}))
(-column-major [this] this)
nil
(-row-major [this] (row-view. (ds/->dataset {}) {}))
(-column-major [this] (ds/->dataset {}))
clojure.lang.PersistentArrayMap
(-row-major [this] (row-view. (ds/->dataset {}) {}))
(-column-major [this] (ds/->dataset {})))
(defn row-major [coll]
(if (extends? ITransposable (type coll))
(-row-major coll)
(if (seq coll)
coll
(throw (ex-info "under construction!" {})))))
(defn column-major [coll]
(if (extends? ITransposable (type coll))
(-column-major coll)
(if (seq coll)
coll
(throw (ex-info "under construction!" {})))))
(defn into-dataset
([to] (column-major to))
([to from]
(->> from
row-major
(into (row-major to))
column-major))
([to xform from]
(->> from
row-major
(into (row-major to) xform)
column-major)))
(def +empty-records+ (row-major (ds/->dataset {})))
;;long way...
(->> (row-major d)
(transduce (map (fn [{:keys [a] :as r}]
(assoc r :c (* a 3))))
conj
(empty (row-major d)))
(column-major))
;; | :b | :a | :c |
;; |------|---:|---:|
;; | :foo | 1 | 3 |
;; | :bar | 2 | 6 |
;; | :baz | 3 | 9 |
(def xf (map (fn [{:keys [a] :as r}]
(assoc r :c (* a 3)))))
(->> d
row-major
(into +empty-records+ xf)
column-major)
;; | :b | :a | :c |
;; |------|---:|---:|
;; | :foo | 1 | 3 |
;; | :bar | 2 | 6 |
;; | :baz | 3 | 9 |
;;testing
;;equivalent
(into-dataset +empty-records+ xf d)
;; | :b | :a | :c |
;; |------|---:|---:|
;; | :foo | 1 | 3 |
;; | :bar | 2 | 6 |
;; | :baz | 3 | 9 |
(into-dataset {} xf d)
;; | :b | :a | :c |
;; |------|---:|---:|
;; | :foo | 1 | 3 |
;; | :bar | 2 | 6 |
;; | :baz | 3 | 9 |
(->> (range 10)
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)})))
column-major)
;; _unnamed [10 3]:
;; | :a | :b | :c |
;; |---:|---:|---:|
;; | 0 | 0 | 0 |
;; | 1 | 1 | 1 |
;; | 2 | 2 | 4 |
;; ...
;; | 7 | 7 | 49 |
;; | 8 | 8 | 64 |
;; | 9 | 9 | 81 |
(->> (range 10)
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)})))
(transduce (map (fn [{:keys [a b c]}] (+ a b c))) +))
;;375
(->> (range 10)
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)})))
(into [] (map (fn [{:keys [a b c]}] (+ a b c))) ))
;; (column-major (into (row-major (empty d))
;; (comp (filter #(= (:b %) :baz))
;; (map (fn [r] (assoc r :c "some-value"))))
;; (row-major d)))
;;equivalent
(into-dataset {} (comp (filter #(= (:b %) :baz))
(map (fn [r] (assoc r :c "some-value")))) d)
;; | :b | :a | :c |
;; |------|---:|------------|
;; | :baz | 3 | some-value |
;;use seq abstraction
(->> (row-major d) ;;semantically equivalent to ds/mapseq-reader here
(filter #(= (:b %) :baz))
(map (fn [r] (assoc r :c "some-value")))
(into-dataset {}))
;; | :b | :a | :c |
;; |------|---:|------------|
;; | :baz | 3 | some-value |
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment