-
-
Save Kah0ona/1c93cec7d7b0d87073f5225ac47980e7 to your computer and use it in GitHub Desktop.
hiccup to clj-pjdf datastructure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns my.beautiful.ns | |
(:require [clj-pdf.core :as pdf] | |
[clojure.string :refer [replace]] | |
[hickory.core :as hickory] | |
[taoensso.timbre :refer [debug spy]])) | |
(defn parse-classes | |
"parses keyword of shape :a#b.c.d or :a.b.c.d into | |
{:tag :a :id b :classes [c d]} or {:tab :a :id nil :classes [b c d]}" | |
[k] | |
(let [tag (some-> k | |
name | |
(clojure.string/split #"\.|#") | |
first | |
clojure.string/lower-case | |
keyword) | |
classes-or-ids (some->> k | |
name | |
(re-seq #"(\.[a-zA-Z\-_0-9]+)|(#[a-zA-Z\-_0-9]+)") | |
(map first)) | |
r (reduce | |
(fn [acc v] | |
(if (clojure.string/starts-with? v ".") | |
(update acc :classes conj (subs v 1)) | |
(assoc acc :id (subs v 1)))) | |
{:id nil | |
:classes []} | |
classes-or-ids)] | |
(assoc r :tag tag))) | |
(defmulti parse | |
"A collection of parse function to recursive-descent parse the html structure, | |
and produce a clj-pdf compatible output" | |
(fn [arg] | |
(if (string? arg) ;;if its a pure string, dispatch on :string, otherwise dispatch on the first element in the structure | |
:string | |
(-> arg first parse-classes :tag)))) | |
(defn parse-css-style-attr | |
[style] | |
(if (nil? style) | |
{} | |
(->> | |
(clojure.string/split style #";") | |
(map #(clojure.string/split % #":")) | |
(map (fn [[k v]] | |
[(keyword (clojure.string/trim k)) (clojure.string/trim (or v ""))])) | |
(into {})))) | |
(defn hex->int | |
[h] | |
(Integer/parseInt h 16)) | |
(defn parse-font-family | |
[s] | |
(let [parts (clojure.string/split s #",")] | |
(->> | |
parts | |
(map (fn [p] | |
(case (-> p clojure.string/trim | |
clojure.string/lower-case | |
(clojure.string/replace #" " "-") | |
keyword) | |
:courier :courier | |
:helvetica :helvetica | |
:times-new-roman :times-roman | |
:symbol :symbol | |
nil))) | |
(remove nil?) | |
first))) | |
(defn parse-css-color | |
"Parses one of these formats: rgba(123,123,123,0), rgb(123,123,123), #EDEDED into integer tuple (vector), ie. [r g b]." | |
[c] | |
(if-let [res (re-matches #"#([a-fA-F0-9]{2})([a-fA-F0-9]{2})([a-fA-F0-9]{2})" c)] | |
(->> res rest (map hex->int) vec) | |
(if-let [res (re-matches #"rgba\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*[0-9]+\.*[0-9]*\s*\)" c)] | |
(->> res rest (map read-string) vec) | |
(if-let [res (re-matches #"rgb\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*\)" c)] | |
(->> res rest (map read-string) vec) | |
[0 0 0])))) | |
(defn parse-font-size | |
[v] | |
(if-let [[_ g1] (re-matches #"([0-9]+)[a-zA-Z]")] | |
(read-string g1) | |
nil)) | |
(defmulti css-rule->clj-style | |
(fn [k v] | |
k)) | |
(defmethod css-rule->clj-style | |
:font-color | |
[k v] | |
(let [rgb-vec (parse-css-color v)] | |
[:color rgb-vec])) | |
(defmethod css-rule->clj-style | |
:font-size | |
[k v] | |
(when-let [s (parse-font-size v)] | |
[:size s])) | |
(defmethod css-rule->clj-style | |
:font-family | |
[k v] | |
[:family (parse-font-family v)]) | |
(defmethod css-rule->clj-style | |
:default | |
[k v] | |
nil) | |
(defn css-map->clj-styles | |
[m] | |
(into {} | |
(comp | |
(map (fn [[k v]] | |
(css-rule->clj-style k v))) | |
(remove nil?)) | |
m)) | |
(defn parse-attributes | |
[{:keys [style colspan rowspan] :as attrs}] | |
(debug style) | |
(debug (-> style | |
parse-css-style-attr | |
css-map->clj-styles)) | |
(merge | |
(cond-> {} | |
colspan (assoc :colspan colspan) | |
rowspan (assoc :rowspan rowspan)) | |
(-> style | |
parse-css-style-attr | |
css-map->clj-styles))) | |
(defn emit | |
"Generates a clj-pdf element, based on the current element, and children" | |
[elt child-elts] | |
(vec (concat elt (map parse child-elts)))) | |
(defn basic-html-unescape | |
"unescape html output for rendering" | |
[data] | |
(let [out (-> data | |
(.replaceAll """ "\"") | |
(.replaceAll "<" "<") | |
(.replaceAll ">" ">") | |
(.replaceAll "&" "&"))] | |
out)) | |
(defmethod parse ;;parse a pure string, just return it | |
:string | |
[t] | |
(basic-html-unescape t)) | |
(defn generate-tag-name | |
[s classes] | |
(if (or (nil? classes) (empty? classes)) | |
(keyword s) | |
(keyword (str s "." (clojure.string/join "." classes))))) | |
(defn emit-element | |
"generates a :<tag-name>, and handles attributes and sub elements" | |
[tag-name default-attrs [t & [m & elts :as all-elts]]] | |
(let [elt-to-render (generate-tag-name tag-name (-> t parse-classes :classes))] | |
(if (map? m) | |
(let [attrs (merge (parse-attributes m) default-attrs)] | |
(emit [elt-to-render attrs] elts)) | |
(emit [elt-to-render default-attrs] all-elts)))) | |
(defmethod parse | |
:p | |
[all-args] | |
(emit-element "paragraph" {} all-args)) | |
(defmethod parse | |
:b | |
[all-args] | |
(emit-element "phrase" {:style :bold} all-args)) | |
(defmethod parse | |
:strong | |
[all-args] | |
(emit-element "phrase" {:style :bold} all-args)) | |
(defmethod parse | |
:i | |
[all-args] | |
(emit-element "phrase" {:style :italic} all-args)) | |
(defmethod parse | |
:u | |
[all-args] | |
(emit-element "phrase" {:style :underline} all-args)) | |
(defmethod parse | |
:s | |
[all-args] | |
(emit-element "phrase" {:style :strikethru} all-args)) | |
(defmethod parse | |
:strike | |
[all-args] | |
(emit-element "phrase" {:style :strikethru} all-args)) | |
(defmethod parse | |
:td | |
[all-args] | |
(emit-element "pdf-cell" {} all-args)) | |
(defmethod parse | |
:th | |
[all-args] | |
(emit-element "pdf-cell" {} all-args)) | |
(defmethod parse | |
:tr | |
[[t & [m & elts :as all-elts] :as args]] | |
(map parse (if (map? m) elts all-elts))) ;;each entry should be a :th or :td | |
(defmethod parse | |
:br | |
[[t]] | |
"\n") | |
(defmethod parse | |
:ul | |
[all-args] | |
(emit-element "list" {:numbered false} all-args) | |
#_[[t & [m & elts :as all-elts] :as args]] | |
#_(into | |
[:pdf-table | |
{:width-percent 100} | |
[5 95]] | |
(vec | |
(map | |
(fn [li] | |
[[:pdf-cell {:set-border []} "•"] | |
[:pdf-cell {:set-border []} (parse li)]]) | |
(if (map? m) | |
elts | |
all-elts))))) | |
(defmethod parse | |
:h1 | |
[all-args] | |
(emit-element "heading" {:style {:size 16}} all-args)) | |
(defmethod parse | |
:h2 | |
[all-args] | |
(emit-element "heading" {:style {:size 14}} all-args)) | |
(defmethod parse | |
:h3 | |
[all-args] | |
(emit-element "heading" {:style {:size 12}} all-args)) | |
(defmethod parse | |
:h4 | |
[all-args] | |
(emit-element "heading" {:style {:size 11}} all-args)) | |
(defmethod parse | |
:h5 | |
[all-args] | |
(emit-element "heading" {:style {:size 10}} all-args)) | |
(defmethod parse | |
:hr | |
[all-args] | |
(emit-element "line" {} all-args)) | |
(defmethod parse | |
:ol | |
[all-args] | |
(emit-element "list" {:numbered true} all-args)) | |
(defmethod parse | |
:li | |
[all-args] | |
;; (emit-element "phrase" {} all-args) | |
(parse (last all-args))) | |
(defmethod parse | |
:section | |
[all-args] | |
[:chapter | |
(emit-element "section" {:numbered true} all-args)]) | |
(defmethod parse | |
:div | |
[all-args] | |
(emit-element "paragraph" {} all-args)) | |
(defmethod parse | |
:span | |
[all-args] | |
(emit-element "phrase" {} all-args)) | |
(defmethod parse | |
:img | |
[[t {:keys [src] :as m}]] | |
[:image src]) | |
(defn extract-tag | |
[[t]] | |
(-> t parse-classes :tag)) | |
(defn parse-table-header | |
[[t & [m & elts :as all-elts] :as args]] | |
(if-not (= (extract-tag args) :thead) | |
{} | |
{:header (map parse (if (map? m) elts all-elts))})) | |
(defn parse-table-body | |
[[tag & [m & rows :as all-elts] :as tbody-elt]] | |
(map parse (if (map? m) rows all-elts))) | |
(defn parse-table-contents | |
([children] | |
(parse-table-contents nil children)) | |
([m children] | |
(let [first-tag (-> children first extract-tag)] | |
(assert (some #{first-tag} #{:thead :tbody :tr}) "Table's first child should be either a [:tr], [:thead] or [:tbody] element") | |
(if (= (extract-tag (first children)) :thead) ;;ignore, already parsed | |
(parse-table-body (second children)) | |
;;else, assume direct [:tr]'s | |
(map parse children))))) | |
(defn extract-max-column-count | |
[[t1 & [m & elts :as all-elts] :as args]] | |
(if (or (= :tbody (extract-tag args)) | |
(= :thead (extract-tag args))) ;;go one level deeper, check out first :tr child | |
(extract-max-column-count (if (map? m) | |
(first elts) | |
m)) | |
;;it must be a [:tr] | |
(if (= :tr (extract-tag args)) | |
(let [tds (->> args | |
(filter (fn [t] | |
(and (not (map? t)) | |
(not (keyword? t)) | |
(or | |
(= :td (extract-tag t)) | |
(= :th (extract-tag t)))))))] | |
(count tds)) | |
(throw (ex-info "Not a valid table structure, make sure you have either [:tbody], | |
[:thead] or [:tr] as a direct child of a [:table], | |
and make sure [:tr] is a direct child of either [:tbody] [:thead] or [:table]." | |
{:data args}))))) | |
(def default-table-opts | |
{:border-color [200 200 200] | |
:set-border [:bottom] | |
:padding [0 10 10 10] | |
:width-percent 100 | |
} | |
) | |
(defmethod parse | |
nil | |
[_] | |
[:phrase ""] | |
) | |
(defmethod parse | |
;; currently it uses the first row in the table to generate the column widths vector | |
:table | |
[[t & [m & elts :as all-elts] :as args]] | |
(p ::table-profile | |
(into [:pdf-table | |
(merge default-table-opts | |
(parse-table-header (if (map? m) | |
(first elts) | |
m))) | |
;;todo somehow be able to specify widths? or too advanced? | |
(vec | |
(repeat (extract-max-column-count | |
(if (map? m) | |
(first elts) ;;either :thead or :tbody or :tr | |
m)) | |
1))] | |
(if (map? m) | |
(concat [(parse-attributes m)] | |
(parse-table-contents m elts)) ;; => [[:cell ...] [:cell ...] [:cell ...]] | |
(parse-table-contents all-elts))))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment