Skip to content

Instantly share code, notes, and snippets.

@mattak
Created December 4, 2011 04:49
Show Gist options
  • Save mattak/1429207 to your computer and use it in GitHub Desktop.
Save mattak/1429207 to your computer and use it in GitHub Desktop.
bing image search api. PLEASE INSERT YOUR APIKEY IN USE
#! /usr/bin/env clojure
(import '(org.apache.http HttpHost)
'(org.apache.http.client HttpClient)
'(org.apache.http.client.methods HttpGet)
'(org.apache.http.impl.client DefaultHttpClient)
'(org.apache.http.conn.params ConnRoutePNames)
'(java.io File
BufferedReader InputStreamReader
InputStream FileInputStream
OutputStreamWriter FileOutputStream))
;(use '[clojure.contrib.duck-streams :only (reader writer read-lines write-lines)])
(use '[clojure.xml :only (parse)])
;; util
;;--------------
(defn rm [file]
(.delete (File. file)))
(defn filetype [file]
(if-let [val (re-find #"\.([a-zA-Z]+)$" file)]
(.toLowerCase (second val))))
;; stream
;;--------------
(defn tois [f]
"String to InputStream"
(FileInputStream. (File. f)))
(defn toos [f]
"String to OutputStream"
(FileOutputStream. (File. f)))
(defn lazy-input [input-stream]
"lazy sequence for input character"
(let [step (fn step []
(let [c (.read input-stream)]
(when-not (== c -1)
(cons (char c) (lazy-seq (step))))))]
(lazy-seq (step))))
(defn lazy-input-line [input-stream]
"lazy sequence for input line"
(let [rd (BufferedReader. (InputStreamReader. input-stream))]
(let [step (fn step []
(let [line (.readLine rd)]
(when-not (= line nil)
(cons line (lazy-seq (step))))))]
(lazy-seq (step)))))
(defn is2str [#^InputStream is]
"get context string from input stream"
(apply str (lazy-input-line is)))
(defn is2file [#^InputStream is outpath]
"save context string from inputstream"
(with-open [os (FileOutputStream. (File. outpath))]
(let [data (byte-array (int (Math/pow 2 13)))]
(loop []
(let [readed (. is read data)]
(when-not (== readed -1)
(.write os data 0 readed)
(recur)))))))
;; http
;;--------------
(defn env_proxy_and_port []
(let [orig (System/getenv "http_proxy")]
(if (nil? orig)
nil
(let [host_port (apply str (filter #(not (= %1 \/)) (nth (.split orig "\\/\\/") 1)))]
(let [host_port (.split host_port ":")]
[(nth host_port 0) (Integer/parseInt (nth host_port 1))]
)))))
(defn httpget [url & [{:keys [host port save]}]]
(let [client (DefaultHttpClient.)
method (HttpGet. url)
proxy (if (and host port)
(HttpHost. host port)
nil)]
(if proxy
(doto (.getParams client)
(.setParameter ConnRoutePNames/DEFAULT_PROXY proxy)))
(if-let [response
(try (. client execute method)
(catch Exception e (.println *err* "Error") nil))]
(if save
(is2file (.. response getEntity getContent) save)
(is2str (.. response getEntity getContent))
))))
(defn httpget-autoproxy [url & [{:keys [save]}]]
(if-let [prx (env_proxy_and_port)]
(if save
(httpget url {:host (first prx) :port (second prx) :save save})
(httpget url {:host (first prx) :port (second prx)}))
(if save
(httpget url {:save save})
(httpget url))))
;; xml
;;--------------
(defn xml-bytag [xml tag]
(for [elm (xml-seq xml) :when (= tag (:tag elm))]
(:content elm)))
(defn content1 [elm]
(first (:content elm)))
(defn imgcount [xml]
(Integer/parseInt (-> (xml-bytag xml :mms:Total) first first)))
(defn imglist [xml]
(map first
(for [results (xml-bytag xml :mms:ImageResult)]
(for [res results :when (= :mms:MediaUrl (:tag res))]
(content1 res)))))
(defn getsaveall [urllist namelist]
(doseq [lst (map #(vector %1 %2) urllist namelist)]
(.start
(Thread.
#(if-let [type (filetype (first lst))]
(httpget-autoproxy
(first lst) {:save (str (second lst) "." type)})
(httpget-autoproxy
(first lst) {:save (str (second lst) ".jpg")})
)))))
;; query
;;--------------
(defn getnamelist [fmt nlst]
" \"%03d.jpg\" '(1 2 3 4)"
(map #(format fmt %1)
nlst))
(defn queryurl [appkey query offset count]
(str "http://api.bing.net/xml.aspx?Appid="
appkey
"&query=" query "&sources=image"
"&image.offset=" offset
"&image.count=" count))
(defn divided-query [appkey query offset count]
(loop [ofs offset cnt count queries nil]
(cond (<= cnt 0) (reverse queries)
(<= cnt 50)
(recur (+ ofs cnt)
0
(conj queries (queryurl appkey query ofs cnt)))
true
(recur (+ ofs cnt)
(- cnt 50)
(conj queries (queryurl appkey query ofs 50))))))
(defn query2imglist [appkey query offset count]
(flatten
(for [s (divided-query appkey query offset count)]
(do
(httpget-autoproxy s {:save ".tmp.xml"})
(let [xml (imglist (parse ".tmp.xml"))]
(rm ".tmp.xml")
xml)))))
;; main
;;--------------
; usage
(defn usage-exit []
(println "usage: search-word number? offset? format?")
(println " number: query image number, default 100")
(println " offset: query image offset, default 0")
(println " format: save file name format except extension, default \"%03d\"")
(println "ex:")
(println " bing cat 100 0 \"%cat03d\"")
(println " bing cat 200 100")
(System/exit 0))
(defn args [n & default]
(if (> (count *command-line-args*) n)
(nth *command-line-args* n)
(if default
(first default)
nil)))
; argument check
(if (< (count *command-line-args*) 1)
(usage-exit))
(def config {
:apikey "INSERT_YOUR_APIKEY"
:tmpxml "tmp.xml"
:search-word (args 0)
:query-number (Integer/parseInt (args 1 "100"))
:query-offset (Integer/parseInt (args 2 "0"))
:save-format (args 3 "%03d")
})
(def namelist
(getnamelist
(:save-format config)
(range
(:query-offset config)
(+ (:query-offset config) (:query-number config)))))
(def urllist
(query2imglist
(:apikey config)
(:search-word config)
(:query-offset config)
(:query-number config)))
(getsaveall urllist namelist)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment