Skip to content

Instantly share code, notes, and snippets.

@takehiko
Created February 22, 2013 20:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takehiko/38fb7b6173069a142aac to your computer and use it in GitHub Desktop.
Save takehiko/38fb7b6173069a142aac to your computer and use it in GitHub Desktop.
Hatena Fotolife Image Downloader
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
# hatena-fotolife-downloader.rb
# by takehikom (http://d.hatena.ne.jp/takehikom/)
if RUBY_VERSION < "1.9"
$KCODE = "u"
end
require "optparse"
require "open-uri"
require "uri"
DEFAULT_USERNAME = "takehikom"
SLEEP_TIME = 2
class HatenaFotolifeDownloader
def initialize(h = {})
@opt = h.dup
@user = @opt[:user] || DEFAULT_USERNAME
@opt[:dir] ||= @user
@filename_urllist = "hatena-fotolife-downloader-url-#{@user}.txt"
@wget_command = "wget"
@wget_command += " -q" if @opt[:quiet]
end
def start
if !@opt[:downloadonly]
get_image_url
end
if !@opt[:listonly]
@opt[:simplewget] ? download_by_url_simple : download_by_url
end
end
private
def puts(message, level = nil)
return if @opt[:quiet]
return if level == :debug && !$DEBUG
Kernel.puts message
end
def system(command)
puts(command, :info)
Kernel.system command
end
def mkdir(dir)
if test(?d, dir)
return false
else
system("mkdir -p #{dir}")
end
end
def get_image_url
url_h = {} # dir (may be empty) => array of urls
puts("[rss] http://f.hatena.ne.jp/#{@user}", :info)
url_a = get_image_url_by_rss(@user)
puts(url_a, :debug)
url_h[""] = url_a
dir_a = get_subdir
dir_a.each do |dir|
puts("[rss] http://f.hatena.ne.jp/#{@user}/#{dir}", :info)
url_a = get_image_url_by_rss(File.join(@user, dir))
puts(url_a, :debug)
url_h[dir] = url_a
end
open(@filename_urllist, "w") do |f_out|
url_h.keys.sort.each do |dir|
if !(@opt[:simplewget] || dir.empty?)
f_out.puts "# #{URI.decode(dir)}"
end
f_out.puts url_h[dir]
end
end
url_h
end
def get_image_url_by_rss(str = @user)
url_a = []
# http://mikio.hatenablog.com/entry/2012/11/18/014228
base_url = "http://f.hatena.ne.jp/#{str}/rss?page="
page = 1
while true
rss_url = base_url + page.to_s
url_count = 0
puts("[rss] #{rss_url}", :info)
open(rss_url) do |file|
file.each_line do |line|
next unless line.match(/<hatena:imageurl>/)
line.sub!(/.*<hatena:imageurl>/, '')
line.sub!(/<.*/, '')
line.strip!
next unless line.match(/^http:\/\//)
url_count += 1
url_a << line
end
end
break if url_count < 10
page += 1
end
url_a
end
def get_subdir
dir_a = []
url = "http://f.hatena.ne.jp/#{@user}"
open(url) do |file|
file.each_line do |line|
next unless line.index('alt="folder"')
if /href=\"\/#{@user}\/(.*?)\/\"/ =~ line
dir_a << $1
end
end
end
dir_a
end
def download_by_url
download_count = 0
dir = @opt[:dir]
mkdir(dir)
dir2 = ""
open(@filename_urllist) do |f_in|
f_in.each_line do |line|
if /^\# (.+)$/ =~ line
dir2 = $1
mkdir(File.join(dir, dir2))
end
url = line.strip
next unless url.index("http") == 0
basename_image = url.split("/")[-1]
filename_image = File.join(dir, dir2, basename_image)
puts("[image] #{url} => #{filename_image}", :info)
if test(?f, filename_image) && !@opt[:force]
puts(" skipped because the file exists")
next
end
system("#{@wget_command} -O #{filename_image} #{url}")
download_count += 1
return if Numeric === @opt[:filemax] && download_count >= @opt[:filemax]
sleep SLEEP_TIME
end
end
puts("#{download_count} file(s) downloaded", :info)
end
def download_by_url_simple
system("#{@wget_command} -w #{SLEEP_TIME} -x -i #{@filename_urllist}")
end
end
if __FILE__ == $0
op = OptionParser.new
h = {}
op.on("-u", "--user=VAL", "hatena user name") {|v|
h[:user] = v
}
op.on("-d", "--dir=VAL", "directory for image files") {|v|
h[:dir] = v
}
op.on("-l", "--limit=VAL", "max of files downloaded") {|v|
h[:filemax] = v.to_i
}
op.on("-D", "--download-only", "download only") {
h[:downloadonly] = true
}
op.on("-L", "--list-only", "make list only") {
h[:listonly] = true
}
op.on("-F", "--force-get", "download even if file exists") {
h[:force] = true
}
op.on("-S", "--simple-wget", "download by simple wget") {
h[:simplewget] = true
}
op.on("-q", "--quiet", "quiet") {
h[:quiet] = true
}
op.parse!(ARGV)
HatenaFotolifeDownloader.new(h).start
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment