Created
April 24, 2009 07:28
-
-
Save al3xandr3/101006 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns coverager | |
(:import (com.thoughtworks.selenium DefaultSelenium) | |
(org.openqa.selenium.server SeleniumServer) | |
java.util.Date | |
(java.io FileWriter) | |
(java.text SimpleDateFormat)) | |
(:use clojure.contrib.zip-filter.xml) | |
(:require [clojure.zip :as zip] | |
[clojure.xml :as xml])) | |
(defmacro with-selenium | |
[browser & body] | |
`(let [server# (new SeleniumServer)] | |
(.start server#) | |
(let [~browser (new DefaultSelenium "localhost", 4444, "*firefox", "http://www.google.com/")] | |
(.start ~browser) | |
(.setTimeout ~browser "100000") | |
~@body | |
(.stop ~browser)) | |
(.stop server#))) | |
(def *js-eval* "this.browserbot.getCurrentWindow().document.title;") | |
(defn check-a-page [a-browser a-url] | |
(try | |
(.open a-browser a-url) | |
(Thread/sleep 3000) ; make a little timeout, to avoid overloading server | |
(println (str a-url "," (.getEval a-browser *js-eval*))) | |
(catch Exception e | |
(println (str a-url "," e))))) | |
(defn check-pages [url-list] | |
(with-selenium browser | |
(binding [*out* (FileWriter. (str "output/sc_validate_" (.format (SimpleDateFormat. "yyyy-MM-dd") (Date.)) ".csv"))] | |
(doseq [a-url url-list] | |
(check-a-page browser a-url))))) | |
(defn xml-to-zip [url] | |
"read xml url into a tree" | |
(zip/xml-zip (xml/parse url))) | |
(defn pick-a-sample [a-percentage a-list] | |
"picks a subset (a-)percentage of the total" | |
(filter #(if (> (rand) (- 1 (/ a-percentage 100))) %) a-list)) | |
(defn process-sitemap [sitemap-url] | |
(let [u-list (xml-> (xml-to-zip sitemap-url) :url :loc text)] | |
(check-pages (pick-a-sample 1 u-list)))) | |
(def *sitemap* "http://www.google.com/sitemap.xml") | |
;use: (process-sitemap *sitemap*) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment