Skip to content

Instantly share code, notes, and snippets.

@fredyr
Last active August 9, 2019 16:15
Show Gist options
  • Save fredyr/27b2b476858bda4c4302 to your computer and use it in GitHub Desktop.
Save fredyr/27b2b476858bda4c4302 to your computer and use it in GitHub Desktop.
Parsing flat files in Clojure
;; Code for blog article at
;; http://www.lexicallyscoped.com/2015/01/05/parsing-flat-files-in-clojure.html
(ns flatfile.core
(:use [clojure.java.io])
(:import [java.io PushbackReader]))
(defn load-rulebook [file]
(with-open [r (reader file)]
(read (PushbackReader. r))))
;; Use this version when not include data types, other version below.
(comment
(defn extract-str [s segment]
(let [start (:start-pos segment)
end (:end-pos segment)]
(subs s (dec start) end)))
)
(defn match-identifier? [row ident]
(let [s (extract-str row ident)]
(= s (:id-string ident))))
(defn match-rule? [row idents]
(every? #(match-identifier? row %) idents))
(defn find-rule [row rules]
(first (filter #(match-rule? row (:identifiers %)) rules)))
(defn parse-with-rule [row rule]
(let [segs (:segments rule)
extracted (reduce #(assoc %1 (keyword (:id %2)) (extract-str row %2)) {} segs)]
(assoc extracted :id (:id rule))))
(defn parse-file [file rules]
(with-open [r (reader file)]
(doall
(map #(parse-with-rule % (find-rule % rules)) (line-seq r)))))
(comment
(def rb (load-rulebook "rules/dd.clj"))
(def result (parse-file "data/dd.txt" rb))
)
(defn date-formatter [format timezone]
(let [d (java.text.SimpleDateFormat. format)]
(.setTimeZone d (java.util.TimeZone/getTimeZone timezone))
d))
;;(.parse (date-formatter "yyyyMMdd" "Sweden") "20141230")
(defmulti data-type :data-type)
(defmethod data-type :date [s]
(.parse (date-formatter "yyyyMMdd" "Sweden") (:value s)))
(defmethod data-type :datetime [s]
(.parse (date-formatter "yyyyMMddHHmmss" "Sweden") (:value s)))
(defmethod data-type :long [s] (Long/parseLong (:value s)))
(defmethod data-type :default [s] (:value s))
(defmethod data-type :amount [s]
(let [v (:value s)]
{:kr (Long/parseLong (subs v 1 10))
:ore (Long/parseLong (subs v 10))}))
(defn extract-str [s segment]
(let [start (:start-pos segment)
end (:end-pos segment)
s (subs s (dec start) end)]
(data-type (assoc segment :value s))))
(comment
(prn
(parse-file "data/dd.txt" (load-rulebook "rules/dd.clj")))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment