-
-
Save takehiko/38fb7b6173069a142aac to your computer and use it in GitHub Desktop.
Hatena Fotolife Image Downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# -*- coding: utf-8 -*- | |
# hatena-fotolife-downloader.rb | |
# by takehikom (http://d.hatena.ne.jp/takehikom/) | |
if RUBY_VERSION < "1.9" | |
$KCODE = "u" | |
end | |
require "optparse" | |
require "open-uri" | |
require "uri" | |
DEFAULT_USERNAME = "takehikom" | |
SLEEP_TIME = 2 | |
class HatenaFotolifeDownloader | |
def initialize(h = {}) | |
@opt = h.dup | |
@user = @opt[:user] || DEFAULT_USERNAME | |
@opt[:dir] ||= @user | |
@filename_urllist = "hatena-fotolife-downloader-url-#{@user}.txt" | |
@wget_command = "wget" | |
@wget_command += " -q" if @opt[:quiet] | |
end | |
def start | |
if !@opt[:downloadonly] | |
get_image_url | |
end | |
if !@opt[:listonly] | |
@opt[:simplewget] ? download_by_url_simple : download_by_url | |
end | |
end | |
private | |
def puts(message, level = nil) | |
return if @opt[:quiet] | |
return if level == :debug && !$DEBUG | |
Kernel.puts message | |
end | |
def system(command) | |
puts(command, :info) | |
Kernel.system command | |
end | |
def mkdir(dir) | |
if test(?d, dir) | |
return false | |
else | |
system("mkdir -p #{dir}") | |
end | |
end | |
def get_image_url | |
url_h = {} # dir (may be empty) => array of urls | |
puts("[rss] http://f.hatena.ne.jp/#{@user}", :info) | |
url_a = get_image_url_by_rss(@user) | |
puts(url_a, :debug) | |
url_h[""] = url_a | |
dir_a = get_subdir | |
dir_a.each do |dir| | |
puts("[rss] http://f.hatena.ne.jp/#{@user}/#{dir}", :info) | |
url_a = get_image_url_by_rss(File.join(@user, dir)) | |
puts(url_a, :debug) | |
url_h[dir] = url_a | |
end | |
open(@filename_urllist, "w") do |f_out| | |
url_h.keys.sort.each do |dir| | |
if !(@opt[:simplewget] || dir.empty?) | |
f_out.puts "# #{URI.decode(dir)}" | |
end | |
f_out.puts url_h[dir] | |
end | |
end | |
url_h | |
end | |
def get_image_url_by_rss(str = @user) | |
url_a = [] | |
# http://mikio.hatenablog.com/entry/2012/11/18/014228 | |
base_url = "http://f.hatena.ne.jp/#{str}/rss?page=" | |
page = 1 | |
while true | |
rss_url = base_url + page.to_s | |
url_count = 0 | |
puts("[rss] #{rss_url}", :info) | |
open(rss_url) do |file| | |
file.each_line do |line| | |
next unless line.match(/<hatena:imageurl>/) | |
line.sub!(/.*<hatena:imageurl>/, '') | |
line.sub!(/<.*/, '') | |
line.strip! | |
next unless line.match(/^http:\/\//) | |
url_count += 1 | |
url_a << line | |
end | |
end | |
break if url_count < 10 | |
page += 1 | |
end | |
url_a | |
end | |
def get_subdir | |
dir_a = [] | |
url = "http://f.hatena.ne.jp/#{@user}" | |
open(url) do |file| | |
file.each_line do |line| | |
next unless line.index('alt="folder"') | |
if /href=\"\/#{@user}\/(.*?)\/\"/ =~ line | |
dir_a << $1 | |
end | |
end | |
end | |
dir_a | |
end | |
def download_by_url | |
download_count = 0 | |
dir = @opt[:dir] | |
mkdir(dir) | |
dir2 = "" | |
open(@filename_urllist) do |f_in| | |
f_in.each_line do |line| | |
if /^\# (.+)$/ =~ line | |
dir2 = $1 | |
mkdir(File.join(dir, dir2)) | |
end | |
url = line.strip | |
next unless url.index("http") == 0 | |
basename_image = url.split("/")[-1] | |
filename_image = File.join(dir, dir2, basename_image) | |
puts("[image] #{url} => #{filename_image}", :info) | |
if test(?f, filename_image) && !@opt[:force] | |
puts(" skipped because the file exists") | |
next | |
end | |
system("#{@wget_command} -O #{filename_image} #{url}") | |
download_count += 1 | |
return if Numeric === @opt[:filemax] && download_count >= @opt[:filemax] | |
sleep SLEEP_TIME | |
end | |
end | |
puts("#{download_count} file(s) downloaded", :info) | |
end | |
def download_by_url_simple | |
system("#{@wget_command} -w #{SLEEP_TIME} -x -i #{@filename_urllist}") | |
end | |
end | |
if __FILE__ == $0 | |
op = OptionParser.new | |
h = {} | |
op.on("-u", "--user=VAL", "hatena user name") {|v| | |
h[:user] = v | |
} | |
op.on("-d", "--dir=VAL", "directory for image files") {|v| | |
h[:dir] = v | |
} | |
op.on("-l", "--limit=VAL", "max of files downloaded") {|v| | |
h[:filemax] = v.to_i | |
} | |
op.on("-D", "--download-only", "download only") { | |
h[:downloadonly] = true | |
} | |
op.on("-L", "--list-only", "make list only") { | |
h[:listonly] = true | |
} | |
op.on("-F", "--force-get", "download even if file exists") { | |
h[:force] = true | |
} | |
op.on("-S", "--simple-wget", "download by simple wget") { | |
h[:simplewget] = true | |
} | |
op.on("-q", "--quiet", "quiet") { | |
h[:quiet] = true | |
} | |
op.parse!(ARGV) | |
HatenaFotolifeDownloader.new(h).start | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment