Instantly share code, notes, and snippets.

Embed
What would you like to do?
Gauche CGI, Scrape site-titles in LAN nodes with threads. LAN内で動いているWebサイトからTitleを抜いてリンク集にするCGIです。 マルチスレッドのおかげで概ね高速に動作しますが、Cygwinのpthreadでは情報が抜ける場合が多いです。
#!/Gauche/bin/gosh
(use text.html-lite)
(use rfc.http)
(use rfc.uri)
(use www.cgi)
(use sxml.ssax)
(use sxml.sxpath)
(load "htmlprag.scm")
(use srfi-27) ; random-integer
(use srfi-43) ; vector-swap!
(use gauche.threads)
;from http://d.hatena.ne.jp/rui314/20070118/p1
(random-source-randomize! default-random-source)
(define (shuffle lis)
(let1 vec (list->vector lis)
(do ((i (- (vector-length vec) 1) (- i 1)))
((= i 0) (vector->list vec))
(vector-swap! vec i (random-integer (+ i 1))))))
(define (intlist n)
(if (= n 0)
()
(cons n (intlist (- n 1)))))
(define (gettitle host path)
(guard (exc (else ())) ;drop error
(let-values (((status head body) (http-get host path)))
(let ((xml (html->shtml body)))
(list
status
(uri-compose :scheme "http" :host host :path path)
((sxpath "/html/head/title/text()") xml))))))
(define (mkthr a)
(if (null? a)
()
(cons
(thread-start! (make-thread (lambda _(gettitle
(string-join (list "192.168.11." (number->string (car a))) "")
"/"))))
(mkthr (cdr a)))))
(define (main args)
(let ((urls (remove null? (map thread-join! (mkthr (shuffle (intlist 255)))))))
(cgi-main
(lambda (params)
`(,(cgi-header)
,(html-doctype)
,(html:html
(html:head (html:title "list"))
(html:body
(html:ul
(map (lambda (v)
(if (equal? (car v) "200")
(html:li
(html:a :href (car (cdr v))
(cdr (cdr v))
))
()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment