Skip to content

Instantly share code, notes, and snippets.

Last active August 9, 2019 16:15
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save fredyr/27b2b476858bda4c4302 to your computer and use it in GitHub Desktop.
Parsing flat files in Clojure
;; Code for blog article at
(ns flatfile.core
(:use [])
(:import [ PushbackReader]))
(defn load-rulebook [file]
(with-open [r (reader file)]
(read (PushbackReader. r))))
;; Use this version when not include data types, other version below.
(defn extract-str [s segment]
(let [start (:start-pos segment)
end (:end-pos segment)]
(subs s (dec start) end)))
(defn match-identifier? [row ident]
(let [s (extract-str row ident)]
(= s (:id-string ident))))
(defn match-rule? [row idents]
(every? #(match-identifier? row %) idents))
(defn find-rule [row rules]
(first (filter #(match-rule? row (:identifiers %)) rules)))
(defn parse-with-rule [row rule]
(let [segs (:segments rule)
extracted (reduce #(assoc %1 (keyword (:id %2)) (extract-str row %2)) {} segs)]
(assoc extracted :id (:id rule))))
(defn parse-file [file rules]
(with-open [r (reader file)]
(map #(parse-with-rule % (find-rule % rules)) (line-seq r)))))
(def rb (load-rulebook "rules/dd.clj"))
(def result (parse-file "data/dd.txt" rb))
(defn date-formatter [format timezone]
(let [d (java.text.SimpleDateFormat. format)]
(.setTimeZone d (java.util.TimeZone/getTimeZone timezone))
;;(.parse (date-formatter "yyyyMMdd" "Sweden") "20141230")
(defmulti data-type :data-type)
(defmethod data-type :date [s]
(.parse (date-formatter "yyyyMMdd" "Sweden") (:value s)))
(defmethod data-type :datetime [s]
(.parse (date-formatter "yyyyMMddHHmmss" "Sweden") (:value s)))
(defmethod data-type :long [s] (Long/parseLong (:value s)))
(defmethod data-type :default [s] (:value s))
(defmethod data-type :amount [s]
(let [v (:value s)]
{:kr (Long/parseLong (subs v 1 10))
:ore (Long/parseLong (subs v 10))}))
(defn extract-str [s segment]
(let [start (:start-pos segment)
end (:end-pos segment)
s (subs s (dec start) end)]
(data-type (assoc segment :value s))))
(parse-file "data/dd.txt" (load-rulebook "rules/dd.clj")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment