Skip to content

Instantly share code, notes, and snippets.

@jeffh
Last active April 12, 2021 23:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffh/a9287af4af0da7cba3e87d6afcf9e3d9 to your computer and use it in GitHub Desktop.
Save jeffh/a9287af4af0da7cba3e87d6afcf9e3d9 to your computer and use it in GitHub Desktop.
Meander RSS Example
(require '[meander.epsilon :as m])
(require '[clojure.data.xml :as xml])
(import 'java.net.URI)
;; TODO: more imports & requires
(defn- feed-content [type content]
{:type type
:content content})
(defn- assoc-hash [m hash-key keys]
(assoc m hash-key (hash (select-keys m keys))))
(defn- str->inst [s]
(comment some date parsing logic here))
(defn- feed->map [nodes]
(let [_content-ns (str "xmlns." (java.net.URLEncoder/encode "http://purl.org/rss/1.0/modules/content/"))]
(m/find
nodes
;; RSS 2.0
{:tag :rss
:attrs {:version "2.0"}
:content (m/scan
{:tag (m/keyword "channel")
:content (m/and (m/scan {:tag (m/keyword "title")
:content ?title})
(m/scan {:tag (m/keyword "link")
:content ?link})
(m/scan {:tag (m/keyword "description")
:content ?description})
?content)})}
{:feed/version "RSS/2.0"
:feed/title (feed-content "html" ?title)
:feed/link (when-let [lnk (not-empty (str-join ?link))] (URI. lnk))
:feed/description (feed-content "html" ?description)
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "item")
:content ?content}) ?content)
:let [link (m/find item (m/scan {:tag :link
:content ?link})
(str-join ?link))]]
{:feed.entry/title (m/find item (m/scan {:tag :title
:content ?title})
(feed-content "html" ?title))
:feed.entry/link (URI. link)
:feed.entry/description (m/find item (m/scan {:tag :description
:content ?d})
(feed-content "html" ?d))
:feed.entry/guid (or (m/find item (m/scan {:tag :guid
:content ?guid})
(not-empty (str-join ?guid)))
(sha256 link)
(str (UUID/randomUUID)))
:feed.entry/pub-date (m/find item (m/scan {:tag :pubDate
:content ?pub-date})
(str->inst (str-join ?pub-date)))
:feed.entry/categories (m/search item (m/scan {:tag :category
:attrs {:domain ?uri}
:content ?name})
(assoc-hash {:feed.category/name (str-join ?name)
:feed.category/taxonomy-uri (when ?uri (URI. ?uri))}
:feed.category/hash [:feed.category/taxonomy-uri :feed.category/name]))
:feed.entry/contents (not-empty
(concat
(m/search item (m/scan {:tag (m/keyword _content-ns "encoded")
:content ?content})
(feed-content "html" ?content))
(m/search item (m/scan {:tag :enclosure
:attrs {:url ?url
:length ?len
:type ?type}})
(assoc-hash {:feed.content/type ?type
:feed.content/length (Integer/parseInt ?len)
:feed.content/url (when ?url (URI. ?url))}
:feed.content/hash
[:feed.content/type :feed.content/url :feed.content/length]))))
:feed.entry/comment-url (m/find item (m/scan {:tag (m/keyword "comments")
:content ?url})
(URI. (str-join ?url)))
:feed.entry/authors (concat
(m/search item (m/scan {:tag :author
:content ?author})
(for [author ?author]
(-> (if (string? author)
{:feed.person/name author}
{:feed.person/name (m/find author (m/scan {:tag :name
:content ?v}) (str-join ?v))
:feed.person/email (m/find author (m/scan {:tag :email
:content ?v}) (str-join ?v))
:feed.person/uri (m/find author (m/scan {:tag :uri
:content ?v}) (str-join ?v))})
(assoc-hash :feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name]))))
(m/search item
(m/scan {:tag (m/keyword "creator")
:content ?author})
(assoc-hash
{:feed.person/name (str-join ?author)}
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])))})}
;; RSS 1.0
{:tag (m/keyword "RDF")
:content (m/and (m/scan {:tag (m/keyword "channel")
:content (m/and (m/scan {:tag (m/keyword "title")
:content ?title})
(m/scan {:tag (m/keyword "link")
:content ?link})
(m/scan {:tag (m/keyword "description")
:content ?description}))})
?content)}
{:feed/version "RSS/1.0"
:feed/title (feed-content "html" ?title)
:feed/link (URI. (str-join ?link))
:feed/description (feed-content "html" ?description)
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "item")
:content ?content}) ?content)
:let [link (m/find item (m/scan {:tag (m/keyword "link")
:content ?link})
(str-join ?link))]]
{:feed.entry/title (m/find item (m/scan {:tag (m/keyword "title")
:content ?title})
(feed-content "html" ?title))
:feed.entry/link (URI. link)
:feed.entry/description (m/find item (m/scan {:tag (m/keyword "description")
:content ?d})
(feed-content "html" ?d))
:feed.entry/guid (or (m/find item (m/scan {:tag (m/keyword "guid")
:content ?guid})
(not-empty (str-join ?guid)))
(sha256 link)
(str (UUID/randomUUID)))
:feed.entry/pub-date (m/find item (m/scan {:tag (m/keyword "date")
:content ?pub-date})
(str->inst (str-join ?pub-date)))
:feed.entry/categories (concat (m/search item (m/scan {:tag (m/keyword "subject")
:content ?v})
(assoc-hash {:feed.category/name (str-join ?v)}
:feed.category/hash [:feed.category/taxonomy-uri :feed.category/name])))
:feed.entry/authors (m/search item
(m/scan {:tag (m/keyword "creator")
:content ?author})
(-> {:feed.person/name (str-join ?author)}
(assoc-hash :feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])))})}
;; Atom 1.0
{:tag (m/keyword "feed")
:content (m/and (m/scan {:tag (m/keyword "title")
:attrs {:type ?title-type}
:content ?title})
(m/scan {:tag (m/keyword "id")
:attrs {:href ?id}})
(m/scan {:tag (m/keyword "link")
:attrs {:href ?link}})
(m/scan {:tag (m/keyword "updated")
:content ?pub-date})
(m/scan {:tag (m/keyword "subtitle")
:attrs {:type ?description-type}
:content ?description})
?content)}
{:feed/version "Atom/1.0"
:feed/title (feed-content ?title-type ?title)
:feed/link (URI. (str-join ?link))
:feed/description (feed-content ?description-type ?description)
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "entry")
:content ?c})
?c)
:let [link (m/search item (m/scan {:tag (m/keyword "link")
:attrs {:href ?link}})
(URI. (str-join ?link)))]]
{:feed.entry/title (m/find item (m/scan {:tag (m/keyword "title")
:attrs {:type ?type}
:content ?title})
(feed-content ?type ?title))
:feed.entry/link (URI. link)
:feed.entry/description (m/find item (m/scan {:tag (m/keyword "summary")
:attrs {:type ?type}
:content ?summary})
(feed-content ?type ?summary))
:feed.entry/pub-date (m/find item (m/scan {:tag (m/keyword "updated")
:content ?date})
(str->inst (str-join ?date)))
:feed.entry/guid (or (m/find item (m/scan {:tag (m/keyword "id")
:content ?id})
(not-empty (str-join ?id)))
(sha256 link)
(str (UUID/randomUUID)))
:feed.entry/contents (not-empty
(concat
(m/search item (m/scan {:tag (m/keyword "content")
:attrs {:type ?type}
:content ?content})
(feed-content ?type ?content))))
:feed.entry/categories (m/search item {:tag (m/keyword "category")
:attrs {:term ?category-name
:schema ?category-uri}}
{:feed.category/hash (sha256 (str ?category-uri "!" ?category-name))
:feed.category/name ?category-name
:feed.category/taxonomy-uri (when ?category-uri (URI. ?category-uri))})
:feed.entry/authors (m/search item (m/scan {:tag (m/keyword "author")
:content (m/and (m/scan {:tag (m/keyword "name")
:content ?author-name})
?children)})
(assoc-hash
{:feed.person/name ?author-name
:feed.person/uri (m/find ?children {:tag (m/keyword "uri")
:content ?uri}
?uri)
:feed.person/email (m/find ?children {:tag (m/keyword "email")
:content ?email}
?email)}
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name]))
:feed.entry/contributors (m/search item (m/scan {:tag (m/keyword "contributor")
:content (m/and (m/scan {:tag (m/keyword "name")
:content ?author-name})
?children)})
(assoc-hash
{:feed.person/name ?author-name
:feed.person/uri (m/find ?children {:tag (m/keyword "uri")
:content ?uri}
?uri)
:feed.person/email (m/find ?children {:tag (m/keyword "email")
:content ?email}
?email)}
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name]))})})))
(comment
;; usage
(feed->map (xml/parse-str (slurp "rss.xml")))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment