Skip to content

Instantly share code, notes, and snippets.

@laczoka
Last active May 19, 2020 10:26
Show Gist options
  • Save laczoka/3814bc2b7c1c1f86e5256cd620ed4387 to your computer and use it in GitHub Desktop.
Save laczoka/3814bc2b7c1c1f86e5256cd620ed4387 to your computer and use it in GitHub Desktop.
A straight port of a cljs csv library to clojure@beam
;;
;; Straight port of https://raw.githubusercontent.com/testdouble/clojurescript.csv/master/src/testdouble/cljs/csv.cljs
;; by LT
(ns clojure.data.csv
(:require [clojure.string :as str]))
(defn- escape-quotes [s]
(str/replace s "\"" "\"\""))
(defn- wrap-in-quotes [s]
(str "\"" (escape-quotes s) "\""))
(defn- separate [data separator quote?]
(str/join separator
(cond->> data
:always (map str)
quote? (map wrap-in-quotes))))
(defn- write-data [data separator newline quote?]
(str/join newline (map #(separate % separator quote?) data)))
(defn- conj-in [coll index x]
(assoc coll index (conj (nth coll index) x)))
(def ^:private newlines
{:lf "\n" :cr+lf "\r\n"})
(def ^:private newline-error-message
(str ":newline must be one of [" (str/join "," (keys newlines)) "]"))
(defn write-csv
"Writes data to String in CSV-format.
Accepts the following options:
:separator - field separator
(default ,)
:newline - line separator
(accepts :lf or :cr+lf)
(default :lf)
:quote? - wrap in quotes
(default false)"
{:arglists '([data] [data & options]) :added "0.1.0"}
[data & options]
(let [{:keys [separator newline quote?] :or {separator "," newline :lf quote? false}} options]
(if-let [newline-char (get newlines newline)]
(write-data data
separator
newline-char
quote?)
(throw (clojerl.Error. newline-error-message)))))
(defn read-csv
"Reads data from String in CSV-format."
{:arglists '([data] [data & options]) :added "0.3.0"}
[data & options]
(let [{:keys [separator newline] :or {separator "," newline :lf}} options]
(if-let [newline-char (get newlines newline)]
(let [data-length (count data)]
(loop [index 0
state :in-field
in-quoted-field false
field-buffer nil
rows []]
(let [last-row-index (- (count rows) 1)]
(if (< index data-length)
(let [char (nth data index)
next-char (if (< index (- data-length 1))
(nth data (inc index))
nil)
str-char (str char)]
(case state
:in-field
(condp = str-char
"\""
(if in-quoted-field
(if (= (str next-char) "\"")
(recur (+ index 2)
:in-field
true
(str field-buffer char)
rows)
(recur (inc index) :in-field false field-buffer rows))
(recur (inc index)
:in-field
true
field-buffer
(if (> (count rows) 0)
rows
(conj rows []))))
separator
(if in-quoted-field
(recur (inc index)
:in-field
in-quoted-field
(str field-buffer char)
rows)
(recur (inc index)
:end-field
in-quoted-field
""
(conj-in rows last-row-index field-buffer)))
"\r"
(if (and (= newline :cr+lf) (not in-quoted-field))
(recur (inc index)
:in-field
in-quoted-field
field-buffer
rows)
(recur (inc index)
:in-field
in-quoted-field
(str field-buffer char)
rows))
"\n"
(if in-quoted-field
(recur (inc index)
:in-field
in-quoted-field
(str field-buffer char)
rows)
(recur (inc index)
:end-line
in-quoted-field
""
(conj-in rows last-row-index field-buffer)))
(recur (inc index)
:in-field
in-quoted-field
(str field-buffer char)
(if (> (count rows) 0)
rows
(conj rows []))))
:end-field
(condp = str-char
"\""
(recur (inc index) :in-field true field-buffer rows)
separator
(recur (inc index)
:end-field
in-quoted-field
""
(conj-in rows last-row-index ""))
"\n"
(recur (inc index)
:end-line
in-quoted-field
""
(conj-in rows last-row-index field-buffer))
(recur (inc index) :in-field in-quoted-field str-char rows))
:end-line
(case str-char
"\""
(recur (inc index)
:in-field
true
field-buffer
(conj (or rows []) []))
(recur (inc index)
:in-field
in-quoted-field
(str field-buffer char)
(conj (or rows []) [])))))
(conj-in rows last-row-index field-buffer)))))
(throw (clojerl.Error. newline-error-message)))))
;;
;; Straight port of https://github.com/testdouble/clojurescript.csv/blob/master/test/testdouble/cljs/csv_test.cljs
;; by LT
(ns clojure.data.csv-test
(:require [clojure.data.csv :as csv]
[clojure.test :refer [deftest testing is run-tests] :as t]))
(deftest write-csv-test
(let [data [[1 2 3] [4 5 6]]]
(testing "default separator ','"
(is (= "1,2,3\n4,5,6" (csv/write-csv data))))
(testing "user defined separator '|'"
(is (= "1|2|3\n4|5|6" (csv/write-csv data :separator "|"))))
(testing "user defined newline ':cr+lf'"
(is (= "1,2,3\r\n4,5,6" (csv/write-csv data :newline :cr+lf))))
(testing "user defined separator '|' and newline ':cr+lf"
(is (= "1|2|3\r\n4|5|6" (csv/write-csv data :separator "|" :newline :cr+lf))))
(testing "quote each field"
(is (= "\"1,000\",\"2\",\"3\"\n\"4\",\"5,000\",\"6\"" (csv/write-csv [["1,000" "2" "3"] ["4" "5,000" "6"]] :quote? true))))
(testing "str non-string fields"
(is (= "100,2,\n4,500,false"
(csv/write-csv [["100" 2 nil] ["4" "500" false]]))))
(testing "str and quote non-string fields"
(is (= "\"1,000\",\"2\",\"\"\n\"4\",\"5,000\",\"false\""
(csv/write-csv [["1,000" 2 nil] ["4" "5,000" false]] :quote? true))))
(testing "valid characters in quoted fields"
(is (= "\"a\nb\",\"c\rd\"\n\"e,f\",\"g\"\"h\""
(csv/write-csv [["a\nb" "c\rd"] ["e,f" "g\"h"]] :quote? true))))
(testing "fields with spaces"
(is (= "a b,c d\ne f,g h"
(csv/write-csv [["a b" "c d"] ["e f" "g h"]])))
(is (= "\"a b\",\"c d\"\n\"e f\",\"g h\""
(csv/write-csv [["a b" "c d"] ["e f" "g h"]] :quote? true))))
(testing "blank fields at end of row"
(is (= "a,b,c\n1,1,1\n2,,\n3,,"
(csv/write-csv [["a" "b" "c"]
["1" "1" "1"]
["2" "" ""]
["3" "" ""]]))))
(testing "error when newline is not one of :lf OR :cr+lf"
(is (thrown-with-msg? clojerl.Error #":newline" (csv/write-csv data :newline "foo"))))))
(deftest read-csv-test
(let [data [["1" "2" "3"] ["4" "5" "6"]]]
(testing "default separator ','"
(is (= data (csv/read-csv "1,2,3\n4,5,6"))))
(testing "user defined separator '|'"
(is (= data (csv/read-csv "1|2|3\n4|5|6" :separator "|"))))
(testing "user defined newline ':cr+lf'"
(is (= data (csv/read-csv "1,2,3\r\n4,5,6" :newline :cr+lf))))
(testing "user defined separator '|' and newline ':cr+lf'"
(is (= data (csv/read-csv "1|2|3\r\n4|5|6" :separator "|" :newline :cr+lf))))
(testing "valid characters in quoted fields"
(is (= [["a\nb" "c\rd"] ["e,f" "g\"h"]]
(csv/read-csv "\"a\nb\",\"c\rd\"\n\"e,f\",\"g\"\"h\""))))
(testing "quoted fields containing only quotes"
(is (= [["\"" "\"\""] ["\"\"\"" "\"\"\"\""]]
(csv/read-csv "\"\"\"\",\"\"\"\"\"\"\n\"\"\"\"\"\"\"\",\"\"\"\"\"\"\"\"\"\""))))
(testing "fields with spaces"
(is (= [["a b" "c d"] ["e f" "g h"]]
(csv/read-csv "\"a b\",c d\ne f,\"g h\""))))
(testing "empty fields"
(is (= [["a" "b" "c" "d"] ["1" "" "" "d"]]
(csv/read-csv "a,b,c,d\n1,\"\",,d"))))
(testing "blank fields at end of row"
(is (= [["a" "b" "c"]
["1" "1" "1"]
["2" "" ""]
["3" "" ""]]
(csv/read-csv "a,b,c\n1,1,1\n2,,\n3,,"))))))
(defn run []
(run-tests))
(comment
(run-tests))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment