Created
June 21, 2012 17:14
-
-
Save nomnel/2967115 to your computer and use it in GitHub Desktop.
Project Eulerの問題をDLして1つのhtmlファイルにまとめるやつ。マルチスレッドじゃないので遅いです
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/gosh | |
(add-load-path "." :relative) | |
(use gauche.parseopt) | |
(use file.util) | |
(use rfc.http) | |
(use sxml.sxpath) | |
(load "htmlprag.scm") | |
(define SERVER "projecteuler.net") | |
(define (problem-uri id) | |
(string-append "/problem=" (number->string id))) | |
(define (page-uri page) | |
(if (= page 1) "/problems" | |
(string-append "/problems;page=" (number->string page)))) | |
(define (http-get->shtml uri xpath) | |
(receive (status head body) (http-get SERVER uri) | |
((sxpath xpath) (html->shtml body)))) | |
(define (last-problem-id) | |
(let1 last-page | |
(apply max (map string->number | |
(http-get->shtml (page-uri 1) | |
"//div[@class='pagination']/a/text()"))) | |
(string->number | |
(last (http-get->shtml (page-uri last-page) | |
"//table[@class='grid']/tr/td//b/text()"))))) | |
(define (download-files ids) | |
(dolist (id ids) | |
(if (check-directory-tree "./html" (string-append (format #f "~3,'0d" id) ".html")) | |
(print "already in the cache : Problem " (number->string id)) | |
(begin | |
(let1 shtml (http-get->shtml (problem-uri id) "//div[@id='content']/*[position()!=1]") | |
(download-images shtml) | |
(call-with-output-file (string-append "./html/" (format #f "~3,'0d" id) ".html") | |
(^o (display (shtml->html shtml) o)))) | |
(print "downloaded : Problem " (number->string id)))))) | |
(define (download-images shtml) | |
(let1 files ((sxpath "//img/@src/text()") shtml) | |
(dolist (f files) | |
(unless (check-directory-tree "." f) | |
(make-directory* (string-join (reverse (cdr (reverse (string-split f "/")))) "/")) | |
(call-with-output-file (string-append "./" f) | |
(^(out) (http-get SERVER (string-append "/" f) | |
:sink out :flusher (^ _ #t)))))))) | |
(define (concat-html-files name) | |
(sys-system | |
(string-append "cat ./html/* > " name))) | |
(define (main args) | |
(let-args (cdr args) ((name "n|name" "problems.html")) | |
(let1 ids (iota (last-problem-id) 1) | |
(make-directory* "./html") | |
(download-files ids) | |
(print "downloaded all problems") | |
(concat-html-files name) | |
(print "concatenated all problems to " name))) | |
0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment