Last active
August 29, 2015 14:06
-
-
Save fukamachi/55f8e9125842bacc298d to your computer and use it in GitHub Desktop.
Twitpic backup script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#| | |
Twitpic Downloader | |
================== | |
This Common Lisp script downloads all image/video files from Twitpic account to the current directory. | |
## Usage | |
;; SBCL | |
$ sbcl --noinform --load twitpic-dl.lisp | |
;; Clozure CL | |
$ ccl --load twitpic-dl.lisp | |
;; GNU CLISP | |
$ clisp -q -i twitpic-dl.lisp | |
;; Allegro CL | |
$ alisp -L ~/.clinit.cl -L twitpic-dl.lisp | |
;; CMUCL | |
$ cmucl -load twitpic-dl.lisp | |
## Features | |
* Can download both of images and videos. | |
* Auto retrying when the Twitpic server responds an HTTP error. | |
## Requirements | |
* Common Lisp implementation (SBCL, Clozure CL, GNU CLISP, Allegro CL or CMUCL) | |
* Quicklisp | |
SBCL and Clozure CL are recommended environments. | |
If you use GNU CLISP, make sure it has compiled with dynamic FFI support. | |
## License | |
Public Domain | |
|# | |
(in-package :cl-user) | |
(defun terminate (&optional (status 0) format-string &rest format-arguments) | |
(declare (ignorable status)) | |
(when format-string | |
(fresh-line *error-output*) | |
(apply #'format *error-output* format-string format-arguments) | |
(fresh-line *error-output*)) | |
#+ccl (ccl:quit status) | |
#+sbcl (sb-ext:exit :code status :abort t) | |
#+allegro (excl:exit status :quiet t) | |
#+clisp (ext:quit status) | |
#+cmucl (unix:unix-exit status) | |
#+ecl (ext:quit status) | |
#+abcl (ext:exit :status status) | |
#-(or ccl sbcl allegro clisp cmucl ecl abcl) (cl-user::quit)) | |
#-quicklisp | |
(terminate 1 "[Error] Quicklisp is required. Go get it from http://www.quicklisp.org/beta/") | |
(let ((*standard-output* (make-broadcast-stream))) | |
(ql:quickload '(:drakma :yason :alexandria :lquery :trivial-signal))) | |
(setf (trivial-signal:signal-handler :int) #'terminate) | |
(defpackage twitpic-dl | |
(:use :cl) | |
(:import-from :alexandria | |
:when-let | |
:copy-stream | |
:with-gensyms | |
:with-input-from-file | |
:with-output-to-file) | |
(:export :download-user-images | |
:user-images)) | |
(in-package :twitpic-dl) | |
(define-condition http-request-error (simple-error) | |
((url :initarg :url :type string))) | |
(define-condition http-request-failed (http-request-error) | |
((status :initarg :status :type integer)) | |
(:report | |
(lambda (condition stream) | |
(with-slots (url status) condition | |
(format stream "HTTP Request to ~S has failed (Code=~D)." | |
url | |
status))))) | |
(define-condition http-request-not-found (http-request-error) ()) | |
(defun retrieve-url (url &key (want-stream t)) | |
(tagbody requesting | |
(multiple-value-bind (body status) | |
(drakma:http-request url | |
:want-stream want-stream | |
:preserve-uri t) | |
(when (= status 404) | |
(error 'http-request-not-found :url url)) | |
(unless (= status 200) | |
(restart-case (error 'http-request-failed | |
:url url | |
:status status) | |
(retry-http-request () | |
:report "Retry the HTTP request." | |
(go requesting)))) | |
(return-from retrieve-url body)))) | |
(defmacro with-retrying-http-request (times &body body) | |
(with-gensyms (retried retry error) | |
`(let ((,retried 0)) | |
(handler-bind ((http-request-failed | |
(lambda (,error) | |
(incf ,retried) | |
(unless (< ,times ,retried) | |
(when-let (,retry (find-restart 'retry-http-request ,error)) | |
(format *error-output* "~&[Error] ~A~%" ,error) | |
(format t "~&Retrying the HTTP request in ~D seconds...~%" | |
(* ,retried 5)) | |
(sleep (* ,retried 5)) | |
(invoke-restart ,retry)))))) | |
,@body)))) | |
(defun download-file (url output &key (element-type '(unsigned-byte 8)) skip-if-exists) | |
(when (and skip-if-exists | |
(probe-file output)) | |
(format t "~&\"~A\" exists. Skipped.~%" output) | |
(return-from download-file)) | |
(format t "~&Downloading ~S to \"~A\"...~%" url output) | |
(let ((body (with-retrying-http-request 3 | |
(retrieve-url url)))) | |
(with-open-file (out output | |
:direction :output :if-exists :supersede | |
:element-type element-type) | |
(copy-stream body out | |
:element-type element-type)))) | |
(defun user-images (username) | |
(flet ((user-show-url (username &optional page) | |
(format nil "http://api.twitpic.com/2/users/show.json?username=~A~:[~;~:*&page=~D~]" | |
username | |
page))) | |
(format t "~&Collecting image informations...This may take a long time...~%") | |
(loop for page from 1 | |
for url = (user-show-url username page) | |
append (handler-case (gethash "images" | |
(yason:parse (with-retrying-http-request 3 | |
(retrieve-url url)))) | |
(http-request-not-found () | |
(return images))) | |
into images))) | |
(defun file-url (image) | |
(flet ((image-file-url (image-id image-type) | |
(format nil | |
"http://d3j5vwomefv46c.cloudfront.net/photos/large/~D.~A" | |
image-id | |
image-type)) | |
(video-file-url (image-short-id) | |
(lquery:$ | |
(initialize (with-retrying-http-request 3 | |
(retrieve-url (format nil "http://twitpic.com/~A" | |
image-short-id) | |
:want-stream nil)))) | |
(aref | |
(lquery:$ "#media #video-display video source" | |
(attr "src")) | |
0))) | |
(if (gethash "video" image) | |
(video-file-url (gethash "short_id" image)) | |
(image-file-url (gethash "id" image) (gethash "type" image))))) | |
(defun download-user-images (username &key (skip-if-exists t)) | |
(format t "~&Start downloading images of ~S from Twitpic...~%" username) | |
(let ((images (user-images username)) | |
(gaveup-count 0)) | |
(format t "~&~D images are found.~%" (length images)) | |
(flet ((output-filename (image) | |
(format nil "~A_~A.~A" | |
(substitute #\. #\: | |
(substitute #\_ #\Space | |
(gethash "timestamp" image))) | |
(gethash "short_id" image) | |
(gethash "type" image)))) | |
(with-output-to-file (out #P"tweets.txt" :if-exists :supersede) | |
(dolist (image images) | |
(format out "~A~%~A~2%" | |
(output-filename image) | |
(gethash "message" image)))) | |
(dolist (image images) | |
(let ((filename (output-filename image))) | |
(handler-case | |
(download-file (file-url image) | |
(pathname filename) | |
:skip-if-exists skip-if-exists) | |
(http-request-error (error) | |
(format *error-output* "~&[Error] ~A~%" error) | |
(format t "~&Gave up to download ~S.~%" | |
filename) | |
(incf gaveup-count)))))) | |
(if (zerop gaveup-count) | |
(format t "~&Successfully downloaded all ~D files. Good for you :)~%" | |
(length images)) | |
(format t "~&Gave up ~D files to download. Retry later.~%" | |
gaveup-count)) | |
(zerop gaveup-count))) | |
#|======================================== | |
Main | |
|# | |
(in-package :cl-user) | |
(defun read-username () | |
(format t "~&Twitpic username: ") | |
(force-output) | |
(loop for username = (read-line *standard-input* nil :eof) | |
if (or (eq username :eof) | |
(string= username "")) | |
do (terminate) | |
else | |
do (return username))) | |
(let ((result (twitpic-dl:download-user-images (read-username)))) | |
(terminate (if result 0 1))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment