Skip to content

Instantly share code, notes, and snippets.

@devn
Created April 1, 2012 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save devn/bdc2f0729ea6d478ec08 to your computer and use it in GitHub Desktop.
Save devn/bdc2f0729ea6d478ec08 to your computer and use it in GitHub Desktop.
yokogiri-gist
(ns yokogiri.core
(:import [com.gargoylesoftware.htmlunit WebClient BrowserVersion]
[com.gargoylesoftware.htmlunit.html HtmlPage]))
(comment ;; Development Switches
(set! *warn-on-reflection* true))
(defn make-client []
(new WebClient))
(defn visit [^WebClient c,
^String url]
(. c getPage url))
(defn xpath [^HtmlPage page,
^String xpath]
(. page getByXPath xpath))
(defn node-xml [node]
(.asXml node))
(defn node-text [node]
(.asText node))
(defn attrs [node]
(let [attrs (.getAttributes node)]
(loop [acc 0, res {}]
(if (= acc (count attrs))
res
(recur (inc acc)
(let [attr (.item attrs acc)]
(assoc res (keyword (.getName attr)) (.getValue attr))))))))
(comment ;; TODO: Add arity
(def browser-versions
{"IE6" BrowserVersion/INTERNET_EXPLORER_6
"IE7" BrowserVersion/INTERNET_EXPLORER_7
"IE8" BrowserVersion/INTERNET_EXPLORER_8
"FF3" BrowserVersion/FIREFOX_3})
(defn make-client
([] (make-client (.getNickname (BrowserVersion/getDefault))))
([version] (let [vers ((.toUpperCase version) browser-versions)]
(new WebClient ,,,))))
)
(comment ;; Trying something else instead...
(ns yokogiri.core
(:import [org.cyberneko.html.parsers DOMParser]
[org.w3c.dom HTMLDocumentImpl]))
(def parser
(new DOMParser))
(def parsed-url
(. parser parse "http://clojure-log.n01se.net/date/2008-02-01.html"))
(def document
(. parser getDocument))
(defn get-element-by-id
[^org.apache.html.dom.HTMLDocumentImpl doc ^String id]
(. doc getElementById id))
(defn get-elements-by-tag-name
[^org.apache.html.dom.HTMLDocumentImpl doc ^String tag]
(. doc getElementsByTagName tag))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment