Skip to content

Instantly share code, notes, and snippets.

@Kah0ona
Created October 17, 2018 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kah0ona/1c93cec7d7b0d87073f5225ac47980e7 to your computer and use it in GitHub Desktop.
Save Kah0ona/1c93cec7d7b0d87073f5225ac47980e7 to your computer and use it in GitHub Desktop.
hiccup to clj-pjdf datastructure
(ns my.beautiful.ns
(:require [clj-pdf.core :as pdf]
[clojure.string :refer [replace]]
[hickory.core :as hickory]
[taoensso.timbre :refer [debug spy]]))
(defn parse-classes
"parses keyword of shape :a#b.c.d or :a.b.c.d into
{:tag :a :id b :classes [c d]} or {:tab :a :id nil :classes [b c d]}"
[k]
(let [tag (some-> k
name
(clojure.string/split #"\.|#")
first
clojure.string/lower-case
keyword)
classes-or-ids (some->> k
name
(re-seq #"(\.[a-zA-Z\-_0-9]+)|(#[a-zA-Z\-_0-9]+)")
(map first))
r (reduce
(fn [acc v]
(if (clojure.string/starts-with? v ".")
(update acc :classes conj (subs v 1))
(assoc acc :id (subs v 1))))
{:id nil
:classes []}
classes-or-ids)]
(assoc r :tag tag)))
(defmulti parse
"A collection of parse function to recursive-descent parse the html structure,
and produce a clj-pdf compatible output"
(fn [arg]
(if (string? arg) ;;if its a pure string, dispatch on :string, otherwise dispatch on the first element in the structure
:string
(-> arg first parse-classes :tag))))
(defn parse-css-style-attr
[style]
(if (nil? style)
{}
(->>
(clojure.string/split style #";")
(map #(clojure.string/split % #":"))
(map (fn [[k v]]
[(keyword (clojure.string/trim k)) (clojure.string/trim (or v ""))]))
(into {}))))
(defn hex->int
[h]
(Integer/parseInt h 16))
(defn parse-font-family
[s]
(let [parts (clojure.string/split s #",")]
(->>
parts
(map (fn [p]
(case (-> p clojure.string/trim
clojure.string/lower-case
(clojure.string/replace #" " "-")
keyword)
:courier :courier
:helvetica :helvetica
:times-new-roman :times-roman
:symbol :symbol
nil)))
(remove nil?)
first)))
(defn parse-css-color
"Parses one of these formats: rgba(123,123,123,0), rgb(123,123,123), #EDEDED into integer tuple (vector), ie. [r g b]."
[c]
(if-let [res (re-matches #"#([a-fA-F0-9]{2})([a-fA-F0-9]{2})([a-fA-F0-9]{2})" c)]
(->> res rest (map hex->int) vec)
(if-let [res (re-matches #"rgba\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*[0-9]+\.*[0-9]*\s*\)" c)]
(->> res rest (map read-string) vec)
(if-let [res (re-matches #"rgb\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*\)" c)]
(->> res rest (map read-string) vec)
[0 0 0]))))
(defn parse-font-size
[v]
(if-let [[_ g1] (re-matches #"([0-9]+)[a-zA-Z]")]
(read-string g1)
nil))
(defmulti css-rule->clj-style
(fn [k v]
k))
(defmethod css-rule->clj-style
:font-color
[k v]
(let [rgb-vec (parse-css-color v)]
[:color rgb-vec]))
(defmethod css-rule->clj-style
:font-size
[k v]
(when-let [s (parse-font-size v)]
[:size s]))
(defmethod css-rule->clj-style
:font-family
[k v]
[:family (parse-font-family v)])
(defmethod css-rule->clj-style
:default
[k v]
nil)
(defn css-map->clj-styles
[m]
(into {}
(comp
(map (fn [[k v]]
(css-rule->clj-style k v)))
(remove nil?))
m))
(defn parse-attributes
[{:keys [style colspan rowspan] :as attrs}]
(debug style)
(debug (-> style
parse-css-style-attr
css-map->clj-styles))
(merge
(cond-> {}
colspan (assoc :colspan colspan)
rowspan (assoc :rowspan rowspan))
(-> style
parse-css-style-attr
css-map->clj-styles)))
(defn emit
"Generates a clj-pdf element, based on the current element, and children"
[elt child-elts]
(vec (concat elt (map parse child-elts))))
(defn basic-html-unescape
"unescape html output for rendering"
[data]
(let [out (-> data
(.replaceAll """ "\"")
(.replaceAll "&lt;" "<")
(.replaceAll "&gt;" ">")
(.replaceAll "&amp;" "&"))]
out))
(defmethod parse ;;parse a pure string, just return it
:string
[t]
(basic-html-unescape t))
(defn generate-tag-name
[s classes]
(if (or (nil? classes) (empty? classes))
(keyword s)
(keyword (str s "." (clojure.string/join "." classes)))))
(defn emit-element
"generates a :<tag-name>, and handles attributes and sub elements"
[tag-name default-attrs [t & [m & elts :as all-elts]]]
(let [elt-to-render (generate-tag-name tag-name (-> t parse-classes :classes))]
(if (map? m)
(let [attrs (merge (parse-attributes m) default-attrs)]
(emit [elt-to-render attrs] elts))
(emit [elt-to-render default-attrs] all-elts))))
(defmethod parse
:p
[all-args]
(emit-element "paragraph" {} all-args))
(defmethod parse
:b
[all-args]
(emit-element "phrase" {:style :bold} all-args))
(defmethod parse
:strong
[all-args]
(emit-element "phrase" {:style :bold} all-args))
(defmethod parse
:i
[all-args]
(emit-element "phrase" {:style :italic} all-args))
(defmethod parse
:u
[all-args]
(emit-element "phrase" {:style :underline} all-args))
(defmethod parse
:s
[all-args]
(emit-element "phrase" {:style :strikethru} all-args))
(defmethod parse
:strike
[all-args]
(emit-element "phrase" {:style :strikethru} all-args))
(defmethod parse
:td
[all-args]
(emit-element "pdf-cell" {} all-args))
(defmethod parse
:th
[all-args]
(emit-element "pdf-cell" {} all-args))
(defmethod parse
:tr
[[t & [m & elts :as all-elts] :as args]]
(map parse (if (map? m) elts all-elts))) ;;each entry should be a :th or :td
(defmethod parse
:br
[[t]]
"\n")
(defmethod parse
:ul
[all-args]
(emit-element "list" {:numbered false} all-args)
#_[[t & [m & elts :as all-elts] :as args]]
#_(into
[:pdf-table
{:width-percent 100}
[5 95]]
(vec
(map
(fn [li]
[[:pdf-cell {:set-border []} "•"]
[:pdf-cell {:set-border []} (parse li)]])
(if (map? m)
elts
all-elts)))))
(defmethod parse
:h1
[all-args]
(emit-element "heading" {:style {:size 16}} all-args))
(defmethod parse
:h2
[all-args]
(emit-element "heading" {:style {:size 14}} all-args))
(defmethod parse
:h3
[all-args]
(emit-element "heading" {:style {:size 12}} all-args))
(defmethod parse
:h4
[all-args]
(emit-element "heading" {:style {:size 11}} all-args))
(defmethod parse
:h5
[all-args]
(emit-element "heading" {:style {:size 10}} all-args))
(defmethod parse
:hr
[all-args]
(emit-element "line" {} all-args))
(defmethod parse
:ol
[all-args]
(emit-element "list" {:numbered true} all-args))
(defmethod parse
:li
[all-args]
;; (emit-element "phrase" {} all-args)
(parse (last all-args)))
(defmethod parse
:section
[all-args]
[:chapter
(emit-element "section" {:numbered true} all-args)])
(defmethod parse
:div
[all-args]
(emit-element "paragraph" {} all-args))
(defmethod parse
:span
[all-args]
(emit-element "phrase" {} all-args))
(defmethod parse
:img
[[t {:keys [src] :as m}]]
[:image src])
(defn extract-tag
[[t]]
(-> t parse-classes :tag))
(defn parse-table-header
[[t & [m & elts :as all-elts] :as args]]
(if-not (= (extract-tag args) :thead)
{}
{:header (map parse (if (map? m) elts all-elts))}))
(defn parse-table-body
[[tag & [m & rows :as all-elts] :as tbody-elt]]
(map parse (if (map? m) rows all-elts)))
(defn parse-table-contents
([children]
(parse-table-contents nil children))
([m children]
(let [first-tag (-> children first extract-tag)]
(assert (some #{first-tag} #{:thead :tbody :tr}) "Table's first child should be either a [:tr], [:thead] or [:tbody] element")
(if (= (extract-tag (first children)) :thead) ;;ignore, already parsed
(parse-table-body (second children))
;;else, assume direct [:tr]'s
(map parse children)))))
(defn extract-max-column-count
[[t1 & [m & elts :as all-elts] :as args]]
(if (or (= :tbody (extract-tag args))
(= :thead (extract-tag args))) ;;go one level deeper, check out first :tr child
(extract-max-column-count (if (map? m)
(first elts)
m))
;;it must be a [:tr]
(if (= :tr (extract-tag args))
(let [tds (->> args
(filter (fn [t]
(and (not (map? t))
(not (keyword? t))
(or
(= :td (extract-tag t))
(= :th (extract-tag t)))))))]
(count tds))
(throw (ex-info "Not a valid table structure, make sure you have either [:tbody],
[:thead] or [:tr] as a direct child of a [:table],
and make sure [:tr] is a direct child of either [:tbody] [:thead] or [:table]."
{:data args})))))
(def default-table-opts
{:border-color [200 200 200]
:set-border [:bottom]
:padding [0 10 10 10]
:width-percent 100
}
)
(defmethod parse
nil
[_]
[:phrase ""]
)
(defmethod parse
;; currently it uses the first row in the table to generate the column widths vector
:table
[[t & [m & elts :as all-elts] :as args]]
(p ::table-profile
(into [:pdf-table
(merge default-table-opts
(parse-table-header (if (map? m)
(first elts)
m)))
;;todo somehow be able to specify widths? or too advanced?
(vec
(repeat (extract-max-column-count
(if (map? m)
(first elts) ;;either :thead or :tbody or :tr
m))
1))]
(if (map? m)
(concat [(parse-attributes m)]
(parse-table-contents m elts)) ;; => [[:cell ...] [:cell ...] [:cell ...]]
(parse-table-contents all-elts)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment