Skip to content

Instantly share code, notes, and snippets.

@jaen

jaen/lel.cljs Secret

Last active December 29, 2015 10:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaen/12d598fae13fcdc14a08 to your computer and use it in GitHub Desktop.
Save jaen/12d598fae13fcdc14a08 to your computer and use it in GitHub Desktop.
(def confidence-threshold 90)
(def target-charset "UTF-8")
(defn open-file [path]
(let [content (.readFileSync fs path)]
(either/right {:path path :content content})))
(defn detect-charset [file-descriptor]
(m/mlet [{:keys [content]} file-descriptor]
(let [charset (.detectCharset charsetDetector content)]
(m/fmap #(merge % {:charset (.toString charset)
:confidence (.-confidence charset)})
file-descriptor))))
(defn is-confident? [file-descriptor]
(m/mlet [{:keys [path confidence]} file-descriptor]
(if (> confidence confidence-threshold)
file-descriptor
(either/left {:error (str "The charset confidence of " path " is " confidence
", which is below the threshold of " confidence-threshold ".")}))))
(defn convert-to-utf8 [file-descriptor]
(m/mlet [{:keys [content charset]} file-descriptor]
(if (not= target-charset charset)
(let [converted-content (cr (.decode iconv content charset))]
(m/fmap #(merge % {:converted-content converted-content
:converted? true})
file-descriptor))
file-descriptor)))
(defn print-error [{:keys [error]}]
(println "There was an error: " error))
(defn write-file! [{:keys [path decoded-content]}]
(println "I would write the converted file out here: " path))
(defn clean-file! [path]
(either/branch
(-> path
open-file
detect-charset
is-confident?
convert-to-utf8)
print-error
write-file!))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment