Skip to content

Instantly share code, notes, and snippets.

@nomnel
Created June 21, 2012 17:14
Show Gist options
  • Save nomnel/2967115 to your computer and use it in GitHub Desktop.
Save nomnel/2967115 to your computer and use it in GitHub Desktop.
Project Eulerの問題をDLして1つのhtmlファイルにまとめるやつ。マルチスレッドじゃないので遅いです
#!/usr/local/bin/gosh
(add-load-path "." :relative)
(use gauche.parseopt)
(use file.util)
(use rfc.http)
(use sxml.sxpath)
(load "htmlprag.scm")
(define SERVER "projecteuler.net")
(define (problem-uri id)
(string-append "/problem=" (number->string id)))
(define (page-uri page)
(if (= page 1) "/problems"
(string-append "/problems;page=" (number->string page))))
(define (http-get->shtml uri xpath)
(receive (status head body) (http-get SERVER uri)
((sxpath xpath) (html->shtml body))))
(define (last-problem-id)
(let1 last-page
(apply max (map string->number
(http-get->shtml (page-uri 1)
"//div[@class='pagination']/a/text()")))
(string->number
(last (http-get->shtml (page-uri last-page)
"//table[@class='grid']/tr/td//b/text()")))))
(define (download-files ids)
(dolist (id ids)
(if (check-directory-tree "./html" (string-append (format #f "~3,'0d" id) ".html"))
(print "already in the cache : Problem " (number->string id))
(begin
(let1 shtml (http-get->shtml (problem-uri id) "//div[@id='content']/*[position()!=1]")
(download-images shtml)
(call-with-output-file (string-append "./html/" (format #f "~3,'0d" id) ".html")
(^o (display (shtml->html shtml) o))))
(print "downloaded : Problem " (number->string id))))))
(define (download-images shtml)
(let1 files ((sxpath "//img/@src/text()") shtml)
(dolist (f files)
(unless (check-directory-tree "." f)
(make-directory* (string-join (reverse (cdr (reverse (string-split f "/")))) "/"))
(call-with-output-file (string-append "./" f)
(^(out) (http-get SERVER (string-append "/" f)
:sink out :flusher (^ _ #t))))))))
(define (concat-html-files name)
(sys-system
(string-append "cat ./html/* > " name)))
(define (main args)
(let-args (cdr args) ((name "n|name" "problems.html"))
(let1 ids (iota (last-problem-id) 1)
(make-directory* "./html")
(download-files ids)
(print "downloaded all problems")
(concat-html-files name)
(print "concatenated all problems to " name)))
0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment