Skip to content

Instantly share code, notes, and snippets.

@uwo
Last active July 19, 2018 13:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save uwo/9aa8a9d17b07340cc9f388ca2a91076c to your computer and use it in GitHub Desktop.
Save uwo/9aa8a9d17b07340cc9f388ca2a91076c to your computer and use it in GitHub Desktop.
explore xml

deps.edn

{:deps {gist-uwo/xmlplore {:git/url "https://gist.github.com/uwo/9aa8a9d17b07340cc9f388ca2a91076c"
                           :sha "2aae0a2d1a36cd7b59df6e632eb50188fe313851"}}}
{:paths ["."]
:deps {com.datomic/datomic-free {:mvn/version "0.9.5697"}
org.clojure/data.xml {:mvn/version "0.0.8"}}}
(ns xmlplore
"dom approach"
(:refer-clojure :exclude [load])
(:require [clojure.walk :as walk]
[clojure.data.xml :as dxml]
[datomic.api :as d]))
(def schema
[{:db/ident :xml.node/tag
:db/index true
:db/valueType :db.type/keyword
:db/cardinality :db.cardinality/one}
{:db/ident :xml.node/attr
:db/valueType :db.type/ref
:db/isComponent true
:db/cardinality :db.cardinality/many}
{:db/ident :xml.node/content
:db/index true
:db/valueType :db.type/ref
:db/isComponent true
:db/cardinality :db.cardinality/many}
{:db/ident :xml.node/value
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}
{:db/ident :xml.node.attr/key
:db/valueType :db.type/keyword
:db/cardinality :db.cardinality/one}
{:db/ident :xml.node.attr/value
:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}])
(defn scratch
[]
(let [uri (str "datomic:mem://" (d/squuid))
_ (d/create-database uri)
conn (d/connect uri)]
(d/transact conn schema)
conn))
(defn value? [content] (string? (first content)))
(defn node? [content] (= :node (::type (meta (first content)))))
(defn transform-attrs
[attrs]
(reduce-kv
(fn [acc k v]
(conj acc {:xml.node.attr/key k
:xml.node.attr/value v}))
[]
attrs))
(defn to-db-attrs
[tree]
(walk/prewalk
(fn [form]
(if (map-entry? form)
(let [[k v] form]
(condp identical? k
:tag [:xml.node/tag v]
:attrs [:xml.node/attr (transform-attrs v)]
:content (cond
(node? v) [:xml.node/content v]
(value? v) [:xml.node/value (first v)])
form))
form))
tree))
(defn pour-into-maps
"Can't walk clojure.data.xml.Element records without first pouring
them into maps."
[tree]
(walk/postwalk
(fn [form]
(if (= clojure.data.xml.Element (type form))
(with-meta (into {} form) {::type :node})
form))
tree))
(defn load
"You might want to narrow xml first if your aim is to stream"
[conn xml]
(let [txdata (-> xml pour-into-maps to-db-attrs vector)]
(d/transact conn txdata)))
(defn load-db
"You might want to narrow xml first if your aim is to stream"
[db xml]
(let [txdata (-> xml pour-into-maps to-db-attrs vector)]
(d/with db txdata)))
(defn all-tags [db] (into #{} (map :v) (d/datoms db :aevt :xml.node/tag)))
(def rules
'[[(ancestors ?node ?parent)
[?parent :xml.node/content ?node]]
[(ancestors ?node ?parent)
(ancestors ?node ?x)
(ancestors ?x ?parent)]])
(defn path-to
[db node]
(let [lookup (->>
(d/q '[:find [?parent ...]
:in $ % ?node
:where
(ancestors ?node ?parent)]
db rules node)
(mapv #(hash-map
:parent (->> % (d/entity db) :xml.node/_content :db/id)
:child %
:tag (->> % (d/entity db) :xml.node/tag)))
(reduce
(completing
(fn [acc rel]
(assoc! acc (:parent rel) (select-keys rel [:child :tag])))
persistent!)
(transient {})))]
(loop [acc (transient []) head nil]
(if-let [{:keys [child tag] :as thing} (get lookup head)]
(recur (conj! acc tag) child)
(persistent! acc)))))
(defn path-to-inclusive
[db e]
(conj (path-to db e) (:xml.node/tag (d/entity db e))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment