Skip to content

Instantly share code, notes, and snippets.

@sbelak
Last active April 12, 2018 09:12
Show Gist options
  • Save sbelak/f47e8530770abb956ba7bf489b3d9408 to your computer and use it in GitHub Desktop.
Save sbelak/f47e8530770abb956ba7bf489b3d9408 to your computer and use it in GitHub Desktop.
(defn- sanitize-key
[k]
(s/replace (name k) "-" "_"))
(defn- sanitize-val
[v]
(cond
(keyword? v) (sanitize-val (name v))
(string? v) (format "\"%s\"" v)
:else v))
(defn- ->options-list
[opts]
(->> (for [[k v] opts]
[(sanitize-key k) (sanitize-val v)])
(map (partial s/join "=") )
(s/join ", ")))
(def tsne-template
(partial format
"import numpy as np
import sklearn.manifold
np.savetxt(\"%s\", sklearn.manifold.TSNE(%s).fit_transform(np.loadtxt(\"%s\", delimiter=\",\",skiprows=1)), delimiter=\",\")"))
(def hdbscan-template
(partial format
"import numpy as np
import hdbscan
np.savetxt(\"%s\", hdbscan.HDBSCAN(%s).fit_predict(np.loadtxt(\"%s\", delimiter=\",\",skiprows=1)), delimiter=\",\")"))
(defn- runpy
[template opts df]
(let [{:keys [columns header cast-fns]} opts
in-file (File/createTempFile "in-" ".csv")
in (.getAbsolutePath in-file)
out-file (File/createTempFile "out-" ".csv")
out (.getAbsolutePath out-file)
_ (csv/spit-csv in (preprocess columns df))
_ (with-programs [python echo]
(-> (template out (->options-list (dissoc opts :columns
:header
:cast-fns))
in)
(echo {:seq true})
python))
result (csv/slurp-csv out :header header :cast-fns cast-fns)
_ (.delete in-file)
_ (.delete out-file)]
result))
(defn tsne
([df]
(tsne {} df))
([opts df]
(->> df
(runpy tsne-template (assoc opts :header [:tsne-0 :tsne-1]
:cast-fns {:tsne-0 csv/->double
:tsne-1 csv/->double}))
(map merge df))))
(defn hdbscan
([df]
(hdbscan {} df))
([opts df]
(->> df
(runpy hdbscan-template
(assoc opts :header [:label]
:cast-fns {:label (comp keyword str csv/->int)}))
(map merge df))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment