Last active
March 11, 2020 11:34
-
-
Save joinr/62591119cc1fb7d5bef988617a8f2cd7 to your computer and use it in GitHub Desktop.
An alternate implementation that respect's the 1x-only reader principle, and instead uses filthy mutation to attempt to not hold on to the head of the iterator-seq
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn csv->columns | |
[input & {:keys [header-row? parser-fn | |
parser-scan-len] | |
:or {header-row? true | |
parser-scan-len 100}}] | |
(let [uncache! (fn [atm] | |
(let [res @atm | |
_ (reset! atm nil)] | |
res)) | |
rows (raw-row-iterable input) | |
data (atom (iterator-seq (.iterator rows))) | |
initial-row (first @data) | |
_ (reset! data (if header-row? | |
(rest @data) | |
@data)) | |
n-cols (count initial-row) | |
^List column-parsers (vec (if parser-fn | |
(let [scan-rows (take parser-scan-len @data) | |
scan-cols (->> (apply interleave scan-rows) | |
(partition parser-scan-len))] | |
(map parser-fn initial-row scan-cols)) | |
(repeatedly n-cols default-column-parser)))] | |
(doseq [^"[Ljava.lang.String;" row (uncache! data)] | |
(loop [col-idx 0] | |
(when (< col-idx n-cols) | |
(let [^String row-data (aget row col-idx) | |
parser (.get column-parsers col-idx)] | |
(if (and row-data | |
(> (.length row-data) 0) | |
(not (.equalsIgnoreCase "na" row-data))) | |
(parse! parser row-data) | |
(missing! parser)) | |
(recur (unchecked-inc col-idx)))))) | |
(mapv (fn [init-row-data parser] | |
(assoc (column-data parser) | |
:name init-row-data)) | |
(if header-row? | |
initial-row | |
(range n-cols)) | |
column-parsers))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment