Skip to content

Instantly share code, notes, and snippets.

@rightfold
Created January 12, 2015 20:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rightfold/806b816eeb73b7ecc474 to your computer and use it in GitHub Desktop.
Save rightfold/806b816eeb73b7ecc474 to your computer and use it in GitHub Desktop.
(ns errorcheck.core
(:require [clj-http.client :as client]
[clojure.edn :as edn]
[clojurewerkz.urly.core :as urly])
(:gen-class))
; TODO: Make not global.
(def cs (clj-http.cookies/cookie-store))
(defn match? [regex str]
(boolean (re-find regex str)))
(defn find-urls [from body]
(let [matches (map #(nth % 3) (re-seq #"(src|href)=(\"|')(.+?)\2" body))]
(set (map #(urly/resolve from %) matches))))
(defn get-links [url predicate]
(let [response (client/get url {:throw-exceptions false, :cookie-store cs})
found-urls (find-urls url (:body response))]
(println (:status response) url)
(filter predicate found-urls)))
(defn crawl [queue had predicate]
(if (empty? queue)
nil
(let [links (get-links (first queue)
#(and (not (contains? had %))
(match? predicate %)))]
(recur (concat (rest queue) links)
(into had links)
predicate))))
(defn -main [seed predicate login-url login-params]
(client/post login-url {:form-params (edn/read-string login-params), :cookie-store cs})
(crawl [seed] #{} (java.util.regex.Pattern/compile predicate)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment