Last active
April 12, 2021 23:39
-
-
Save jeffh/a9287af4af0da7cba3e87d6afcf9e3d9 to your computer and use it in GitHub Desktop.
Meander RSS Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require '[meander.epsilon :as m]) | |
(require '[clojure.data.xml :as xml]) | |
(import 'java.net.URI) | |
;; TODO: more imports & requires | |
(defn- feed-content [type content] | |
{:type type | |
:content content}) | |
(defn- assoc-hash [m hash-key keys] | |
(assoc m hash-key (hash (select-keys m keys)))) | |
(defn- str->inst [s] | |
(comment some date parsing logic here)) | |
(defn- feed->map [nodes] | |
(let [_content-ns (str "xmlns." (java.net.URLEncoder/encode "http://purl.org/rss/1.0/modules/content/"))] | |
(m/find | |
nodes | |
;; RSS 2.0 | |
{:tag :rss | |
:attrs {:version "2.0"} | |
:content (m/scan | |
{:tag (m/keyword "channel") | |
:content (m/and (m/scan {:tag (m/keyword "title") | |
:content ?title}) | |
(m/scan {:tag (m/keyword "link") | |
:content ?link}) | |
(m/scan {:tag (m/keyword "description") | |
:content ?description}) | |
?content)})} | |
{:feed/version "RSS/2.0" | |
:feed/title (feed-content "html" ?title) | |
:feed/link (when-let [lnk (not-empty (str-join ?link))] (URI. lnk)) | |
:feed/description (feed-content "html" ?description) | |
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "item") | |
:content ?content}) ?content) | |
:let [link (m/find item (m/scan {:tag :link | |
:content ?link}) | |
(str-join ?link))]] | |
{:feed.entry/title (m/find item (m/scan {:tag :title | |
:content ?title}) | |
(feed-content "html" ?title)) | |
:feed.entry/link (URI. link) | |
:feed.entry/description (m/find item (m/scan {:tag :description | |
:content ?d}) | |
(feed-content "html" ?d)) | |
:feed.entry/guid (or (m/find item (m/scan {:tag :guid | |
:content ?guid}) | |
(not-empty (str-join ?guid))) | |
(sha256 link) | |
(str (UUID/randomUUID))) | |
:feed.entry/pub-date (m/find item (m/scan {:tag :pubDate | |
:content ?pub-date}) | |
(str->inst (str-join ?pub-date))) | |
:feed.entry/categories (m/search item (m/scan {:tag :category | |
:attrs {:domain ?uri} | |
:content ?name}) | |
(assoc-hash {:feed.category/name (str-join ?name) | |
:feed.category/taxonomy-uri (when ?uri (URI. ?uri))} | |
:feed.category/hash [:feed.category/taxonomy-uri :feed.category/name])) | |
:feed.entry/contents (not-empty | |
(concat | |
(m/search item (m/scan {:tag (m/keyword _content-ns "encoded") | |
:content ?content}) | |
(feed-content "html" ?content)) | |
(m/search item (m/scan {:tag :enclosure | |
:attrs {:url ?url | |
:length ?len | |
:type ?type}}) | |
(assoc-hash {:feed.content/type ?type | |
:feed.content/length (Integer/parseInt ?len) | |
:feed.content/url (when ?url (URI. ?url))} | |
:feed.content/hash | |
[:feed.content/type :feed.content/url :feed.content/length])))) | |
:feed.entry/comment-url (m/find item (m/scan {:tag (m/keyword "comments") | |
:content ?url}) | |
(URI. (str-join ?url))) | |
:feed.entry/authors (concat | |
(m/search item (m/scan {:tag :author | |
:content ?author}) | |
(for [author ?author] | |
(-> (if (string? author) | |
{:feed.person/name author} | |
{:feed.person/name (m/find author (m/scan {:tag :name | |
:content ?v}) (str-join ?v)) | |
:feed.person/email (m/find author (m/scan {:tag :email | |
:content ?v}) (str-join ?v)) | |
:feed.person/uri (m/find author (m/scan {:tag :uri | |
:content ?v}) (str-join ?v))}) | |
(assoc-hash :feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])))) | |
(m/search item | |
(m/scan {:tag (m/keyword "creator") | |
:content ?author}) | |
(assoc-hash | |
{:feed.person/name (str-join ?author)} | |
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])))})} | |
;; RSS 1.0 | |
{:tag (m/keyword "RDF") | |
:content (m/and (m/scan {:tag (m/keyword "channel") | |
:content (m/and (m/scan {:tag (m/keyword "title") | |
:content ?title}) | |
(m/scan {:tag (m/keyword "link") | |
:content ?link}) | |
(m/scan {:tag (m/keyword "description") | |
:content ?description}))}) | |
?content)} | |
{:feed/version "RSS/1.0" | |
:feed/title (feed-content "html" ?title) | |
:feed/link (URI. (str-join ?link)) | |
:feed/description (feed-content "html" ?description) | |
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "item") | |
:content ?content}) ?content) | |
:let [link (m/find item (m/scan {:tag (m/keyword "link") | |
:content ?link}) | |
(str-join ?link))]] | |
{:feed.entry/title (m/find item (m/scan {:tag (m/keyword "title") | |
:content ?title}) | |
(feed-content "html" ?title)) | |
:feed.entry/link (URI. link) | |
:feed.entry/description (m/find item (m/scan {:tag (m/keyword "description") | |
:content ?d}) | |
(feed-content "html" ?d)) | |
:feed.entry/guid (or (m/find item (m/scan {:tag (m/keyword "guid") | |
:content ?guid}) | |
(not-empty (str-join ?guid))) | |
(sha256 link) | |
(str (UUID/randomUUID))) | |
:feed.entry/pub-date (m/find item (m/scan {:tag (m/keyword "date") | |
:content ?pub-date}) | |
(str->inst (str-join ?pub-date))) | |
:feed.entry/categories (concat (m/search item (m/scan {:tag (m/keyword "subject") | |
:content ?v}) | |
(assoc-hash {:feed.category/name (str-join ?v)} | |
:feed.category/hash [:feed.category/taxonomy-uri :feed.category/name]))) | |
:feed.entry/authors (m/search item | |
(m/scan {:tag (m/keyword "creator") | |
:content ?author}) | |
(-> {:feed.person/name (str-join ?author)} | |
(assoc-hash :feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])))})} | |
;; Atom 1.0 | |
{:tag (m/keyword "feed") | |
:content (m/and (m/scan {:tag (m/keyword "title") | |
:attrs {:type ?title-type} | |
:content ?title}) | |
(m/scan {:tag (m/keyword "id") | |
:attrs {:href ?id}}) | |
(m/scan {:tag (m/keyword "link") | |
:attrs {:href ?link}}) | |
(m/scan {:tag (m/keyword "updated") | |
:content ?pub-date}) | |
(m/scan {:tag (m/keyword "subtitle") | |
:attrs {:type ?description-type} | |
:content ?description}) | |
?content)} | |
{:feed/version "Atom/1.0" | |
:feed/title (feed-content ?title-type ?title) | |
:feed/link (URI. (str-join ?link)) | |
:feed/description (feed-content ?description-type ?description) | |
:feed/entries (for [item (m/search ?content (m/scan {:tag (m/keyword "entry") | |
:content ?c}) | |
?c) | |
:let [link (m/search item (m/scan {:tag (m/keyword "link") | |
:attrs {:href ?link}}) | |
(URI. (str-join ?link)))]] | |
{:feed.entry/title (m/find item (m/scan {:tag (m/keyword "title") | |
:attrs {:type ?type} | |
:content ?title}) | |
(feed-content ?type ?title)) | |
:feed.entry/link (URI. link) | |
:feed.entry/description (m/find item (m/scan {:tag (m/keyword "summary") | |
:attrs {:type ?type} | |
:content ?summary}) | |
(feed-content ?type ?summary)) | |
:feed.entry/pub-date (m/find item (m/scan {:tag (m/keyword "updated") | |
:content ?date}) | |
(str->inst (str-join ?date))) | |
:feed.entry/guid (or (m/find item (m/scan {:tag (m/keyword "id") | |
:content ?id}) | |
(not-empty (str-join ?id))) | |
(sha256 link) | |
(str (UUID/randomUUID))) | |
:feed.entry/contents (not-empty | |
(concat | |
(m/search item (m/scan {:tag (m/keyword "content") | |
:attrs {:type ?type} | |
:content ?content}) | |
(feed-content ?type ?content)))) | |
:feed.entry/categories (m/search item {:tag (m/keyword "category") | |
:attrs {:term ?category-name | |
:schema ?category-uri}} | |
{:feed.category/hash (sha256 (str ?category-uri "!" ?category-name)) | |
:feed.category/name ?category-name | |
:feed.category/taxonomy-uri (when ?category-uri (URI. ?category-uri))}) | |
:feed.entry/authors (m/search item (m/scan {:tag (m/keyword "author") | |
:content (m/and (m/scan {:tag (m/keyword "name") | |
:content ?author-name}) | |
?children)}) | |
(assoc-hash | |
{:feed.person/name ?author-name | |
:feed.person/uri (m/find ?children {:tag (m/keyword "uri") | |
:content ?uri} | |
?uri) | |
:feed.person/email (m/find ?children {:tag (m/keyword "email") | |
:content ?email} | |
?email)} | |
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name])) | |
:feed.entry/contributors (m/search item (m/scan {:tag (m/keyword "contributor") | |
:content (m/and (m/scan {:tag (m/keyword "name") | |
:content ?author-name}) | |
?children)}) | |
(assoc-hash | |
{:feed.person/name ?author-name | |
:feed.person/uri (m/find ?children {:tag (m/keyword "uri") | |
:content ?uri} | |
?uri) | |
:feed.person/email (m/find ?children {:tag (m/keyword "email") | |
:content ?email} | |
?email)} | |
:feed.person/hash [:feed.person/uri :feed.person/email :feed.person/name]))})}))) | |
(comment | |
;; usage | |
(feed->map (xml/parse-str (slurp "rss.xml"))) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment