Skip to content

Instantly share code, notes, and snippets.

@Zhomart
Created June 21, 2013 11:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Zhomart/5830619 to your computer and use it in GitHub Desktop.
Save Zhomart/5830619 to your computer and use it in GitHub Desktop.
# 2ch image saver written on ruby.
# create list.txt and put thread url on each line
# sample:
#
# $ cat list.txt
# http://2ch.hk/aa/res/55017.html
# http://2ch.hk/aa/res/22059.html
# http://2ch.hk/aa/res/40624.html
# http://2ch.hk/aa/res/51504.html
require 'open-uri'
require 'nokogiri'
require 'uri'
urls = []
File.open("list.txt") do |f|
urls = f.map(&:strip)
end
exts = %w(jpg gif png)
downloaded_urls = []
if File.exists?('downloaded.txt')
f = File.open('downloaded.txt', 'r')
downloaded_urls = f.readlines.map(&:strip)
f.close
end
# to handle CTRL-C
begin
urls.reverse.each do |base_url|
p "Downloading images from: #{base_url}"
uri = URI(base_url)
next if base_url.empty?
dir = base_url[/[\w\d]{5,}/] || base_url[/\d{2,}/] || base_url[/[\w\d]+/]
Dir.mkdir(dir) if not File.directory?(dir)
html = nil
begin
html = Nokogiri::HTML.parse(uri.open)
rescue
next
end
count = html.root.css("img").size
html.root.css("img").each_with_index do |img, index|
thumb_src = img.attribute('src') ? img.attribute('src').value : nil
next unless thumb_src
id = $1 if thumb_src =~ /.*thumb\/(\d+).*\.\w+/
next unless id
found = false
exts.each do |ext|
src = "http://#{uri.host}/aa/src/#{id}.#{ext}"
path = File.join(dir, id + "." + ext)
break if found = File.exists?(path)
end
next if found
exts.each do |ext|
src = "http://#{uri.host}/aa/src/#{id}.#{ext}"
path = File.join(dir, id + "." + ext)
percent = index * 100.0 / count
print "(%03d/%02d%%) -> (#{dir}) #{src}\n" % [index + 1, percent.to_i]
next if downloaded_urls.include?(src)
downloaded_urls << src
begin
open(path, 'wb') do |file|
file << open(src).read
end
rescue OpenURI::HTTPError
end
if File.exists?(path) && File.size(path) == 0
File.delete(path)
else
break
end
end
end
print "(%03d/100%%) -> Done!\n" % count
end
rescue SystemExit, Interrupt
print "Stopped!"
ensure
print "\nAll images from sources downloaded! Check the dirs please.\n"
File.open('downloaded.txt', 'w') do |f|
f.write(downloaded_urls.join("\n"))
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment