Last active
August 29, 2015 14:03
-
-
Save Jach/6b82fb57bf0bc13937be to your computer and use it in GitHub Desktop.
REPL commands using clj-webdriver to get my tweets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; See revision version 1 for non-phantomJS version, setup phantomJS here: http://blog.zolotko.me/2012/12/clojure-selenium-webdriver-and-phantomjs.html | |
(use 'clj-webdriver.taxi) | |
(import 'org.openqa.selenium.phantomjs.PhantomJSDriver | |
'org.openqa.selenium.remote.DesiredCapabilities) | |
(use '[clj-webdriver.driver :only [init-driver]]) | |
; skip this section until 'lawl' comment | |
(use '[clj-webdriver.core :only [execute-script*]]) | |
(use '[clj-webdriver.js.browserbot :only [script]]) | |
(defn browser-bot [driver fn-name & arguments] | |
(let [script (str script | |
"return browserbot." | |
fn-name | |
".apply(browserbot, arguments)") | |
execute-js-fn (partial execute-script* driver script)] | |
;(println script) | |
(apply execute-js-fn arguments))) | |
; identical to (html el) | |
(defn outer-html [el] (browser-bot (.getWrappedDriver (:webelement el)) "getOuterHTML" (:webelement el))) | |
; LAWL I was on the way to re-creating the already existing (attribute el attr) function... | |
(set-driver! (init-driver {:webdriver (PhantomJSDriver. (DesiredCapabilities. ))})) | |
(to "https://twitter.com/jachy") | |
; (text el) function doesn't work for some reason when using phantomjs. So we'll roll our own. | |
(def first-tweet (attribute (first (find-elements {:tag :p :class "ProfileTweet-text js-tweet-text u-dir"})) "innerHTML")) | |
(defn strip-html [text] | |
(let [pass-1 (.replaceAll text "<.+?>(.+?)(</.+?>)?" "$1") | |
pass-2 (.replaceAll pass-1 "<.+?>(.+?)(</.+?>)?" "$1")] | |
(if (= pass-1 pass-2) | |
(.replaceAll pass-2 "</.+?>" "") ; my regex-fu is weak today, random closing tags at end need replacing | |
(strip-html pass-2)))) | |
; (strip-html first-tweet) | |
(defn entity-replace [text] ; not exhaustive, just two I noticed in my timeline | |
(let [text (.replaceAll text "&" "&") | |
text (.replaceAll text " " " ")] | |
text)) | |
(defn inner-html-text [el] | |
(entity-replace (strip-html (attribute el "innerHTML")))) | |
; print all tweets loaded | |
(doseq [tweet (map inner-html-text (find-elements {:tag :p :class "ProfileTweet-text js-tweet-text u-dir"}))] | |
(println tweet (str "\n" (apply str (take 10 (repeat "-")))))) | |
; get rid of that stupid sign up box, version 1's method didn't work so we'll just brute-force it | |
; bah, this didn't work either judging from screenshot result | |
;(map #(try (click %) (catch Exception e nil)) (find-elements {:tag :button :class "modal-btn modal-close js-close"})) | |
(quit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment