public
Created

Simple webscrape with enlive

  • Download Gist
tutorial1.clj
Clojure
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
(ns tutorial.scrape1
(:require [net.cgrand.enlive-html :as html]))
 
(def *url* "http://www.belex.rs/trgovanje/prospekt/VZAS/show")
(defn get-page
"Gets the html page from passed url"
[url]
(html/html-resource (java.net.URL. url)))
(defn content->string [content]
(cond
(nil? content) ""
(string? content) content
(map? content) (content->string (:content content))
(coll? content) (apply str (map content->string content))
:else (str content)))
(derive clojure.lang.PersistentStructMap ::Map)
(derive clojure.lang.PersistentArrayMap ::Map)
(derive java.lang.String ::String)
(derive clojure.lang.ISeq ::Collection)
(derive clojure.lang.PersistentList ::Collection)
(derive clojure.lang.LazySeq ::Collection)
(defn tag-type [node]
(case (:tag node)
:tr ::CompoundNode
:table ::CompoundNode
:th ::TerminalNode
:td ::TerminalNode
:h3 ::TerminalNode
:tbody ::IgnoreNode
::IgnoreNode))
(defmulti parse-node
(fn [node]
(let [cls (class node)] [cls (if (isa? cls ::Map) (tag-type node) nil)])))
(defmethod parse-node [::Map ::TerminalNode] [node]
(content->string (:content node)))
(defmethod parse-node [::Map ::CompoundNode] [node]
(map parse-node (:content node)))
(defmethod parse-node [::Map ::IgnoreNode] [node]
(parse-node (:content node)))
(defmethod parse-node [::String nil] [node]
node)
(defmethod parse-node [::Collection nil] [node]
(map parse-node node))
(defn h3+table
"returns sequence of <h3> and <table> tags"
[url]
(let [ws-content (get-page url)
h3s+tables (html/select ws-content #{[:div#prospekt_container :h3]
[:div#prospekt_container :table]})]
(for [node h3s+tables] (parse-node node))))

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.