Skip to content

Instantly share code, notes, and snippets.

@mopemope
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mopemope/99246e54d019f349f47b to your computer and use it in GitHub Desktop.
Save mopemope/99246e54d019f349f47b to your computer and use it in GitHub Desktop.
Megami Watcher
(defproject megami.clj "0.1.0-SNAPSHOT"
:description "FIXME: write description"
:url "http://example.com/FIXME"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [
[org.clojure/clojure "1.6.0"]
[clojure-soup "0.0.1"]
[org.clojure/core.async "0.1.303.0-886421-alpha"]
[com.taoensso/timbre "3.2.1"]
[org.clojure/tools.cli "0.2.4"]
[clj-http "0.9.2"]
]
:main megami.watch)
(ns megami.watch
(import
(java.util.regex Pattern)
(org.jsoup.nodes Element))
(:require
[clojure.java.io :as io]
[taoensso.timbre :as timbre]
[clojure.core.async :as async :refer [<! >! <!! >!! timeout chan alt! alts!! go]]
[clj-http.client :as client])
(:use
[jsoup.soup]
[clojure.tools.cli :only [cli]]
))
(timbre/refer-timbre)
(def ^:private url-re (Pattern/compile ".*/(\\d+)/.*" Pattern/DOTALL))
(def ^:dynamic *output-path* "./data")
(def ^:private threads-url "http://kilauea.bbspink.com/megami/subback.html")
(def ^:private encoding "MS932")
(def ^:private title-re (Pattern/compile "\\d:\\s+(.*)\\s*\\((\\d+)\\)" Pattern/DOTALL))
(def ^:private wait (* 1000 60 ))
(def ^:private cmt-re (Pattern/compile "([\\w-:/.]+\\.jpg)|([\\w:/.]+\\.jpeg)|([\\w:/.]*\\.png)|([\\w:/.]*imepita.jp/[\\w./]+)|([\\w:/.]*imepic.jp/[\\w./]+)|([\\w:/.]*i.imgur.com/[\\w./]+)" Pattern/DOTALL))
(defn write-file [file data]
(with-open [w (io/output-stream file)]
(.write w data)))
(defn download [line url file]
(try
(let [res (client/get url {:as :byte-array})
status (:status res)
^String ct (:Content-Type (:headers res))
^String length (:Content-Length (:headers res))
]
(when (and (= status 200) (not (.startsWith ct "text/html")) (not= length "503"))
(let [ext (str "." (last (clojure.string/split ct #"/")))]
(debug (:headers res))
(infof "start download from %s" url)
(infof "download to %s" (str file ext))
(write-file (str file ext) (:body res)))))
(catch Exception e
(do
(errorf "%s:%s" line url)
(error e )
))))
(defn- get-dat-url [url]
(let [[_ index] (re-find (re-matcher url-re url))]
(format "http://kilauea.bbspink.com/megami/dat/%s.dat" index)))
(defn- format-url [root ^String href]
(str root (subs href 0 (- (.length href) 3))))
(defn- parse-int [s]
(Integer. ^String (re-find #"\d+" s)))
(defn- get-thread-info [title]
(let [[_ title res-count] (re-find (re-matcher title-re title))]
{:title title :res-count (parse-int res-count)}))
(defn- parse-thread-list [data]
(let [base-url (first ($ (parse data)
"base"
(attr "href")))]
(when base-url
($ (parse data)
"a[href$=50]"
(map #(assoc (get-thread-info (.text ^Element %))
:url (format-url base-url (.attr ^Element % "href"))))))))
(defn- filename [url]
(last (clojure.string/split url #"/")))
(defn- complement-url [^String url]
(cond
(.startsWith url "http://") url
(.startsWith url "ttp://") (str "h" url)
:else (str "http://" url)))
(defn- search-image [data]
(doseq [line (clojure.string/split (:comment data) #"\n") ]
(let [[m] (re-find (re-matcher cmt-re line))]
(when m
(let [url (complement-url (clojure.string/trim m))
filename (filename url)]
(download line url (str *output-path* "/" filename))))
))
data)
(defn- create-dat-info [thread-info]
(let [title (:title thread-info)
url (:url thread-info)]
(fn [idx line]
(let [[hndl mailto date cmt] (clojure.string/split line #"<>")
cmt (clojure.string/replace cmt #"<br>" "\n")
data {
:title title
:url url
:no (inc idx)
:handle hndl
:mailto mailto
:date date
:comment cmt
}]
(search-image data)))))
(defn- parse-thread [thread-info]
(let [data (slurp (get-dat-url (:url thread-info)) :encoding encoding)
lines (clojure.string/split data #"\r\n|\n")]
(vec (map-indexed (create-dat-info thread-info) lines))))
(defn- get-thread-list []
(let [^String data (slurp threads-url :encoding encoding)]
(parse-thread-list data)))
(defn- watch-megami []
(try
(while true
(let [ts (shuffle (get-thread-list))]
(doseq [thread-info ts]
(infof "start %s" thread-info)
(let [data (parse-thread thread-info)]
(alts!! [(timeout wait)]))
(infof "end %s" thread-info)))
(alts!! [(timeout wait)]))
(catch Exception e
(error e)))
"OK")
(defn start-megami-watch []
(let [c (chan)]
(go (>! c (watch-megami)))
(let [[v ch] (alts!! [c])]
(infof "OK"))))
(defn -main [& args]
(let [[options args banner] (cli args
["-h" "--help" "Show help" :default false :flag true]
["-o" "--output-path" "Set download path" :default "./data"]
)]
(when (:help options)
(println banner)
(System/exit 0))
(let [path (:output-path options)]
(binding [*output-path* path]
(start-megami-watch))
)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment