Skip to content

Instantly share code, notes, and snippets.

@zellux
Created October 3, 2011 16:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zellux/1259563 to your computer and use it in GitHub Desktop.
Save zellux/1259563 to your computer and use it in GitHub Desktop.
chncode book downloader
require 'thread'
require 'logger'
require 'fileutils'
dir_name = ARGV[0] || '707-kindle'
NTHREADS = 4
$logger = Logger.new STDERR
puts "Resizing all images..."
$task_queue = []
entries = Dir.glob("#{dir_name}/**/*")
entries.each_with_index do |f, i|
next unless f[/^[^_]*.gif$/]
$task_queue << "#{f}"
end
def work
threads = []
task_queue = $task_queue.clone
mutex = Mutex.new
1.upto(NTHREADS).each do
threads << Thread.new do
file_name = nil
while
mutex.synchronize {
file_name = task_queue.shift
}
break if file_name.nil?
yield file_name
end
end
end
threads.each {|t| t.join}
end
# work do |file_name|
# $logger.debug file_name
# %x( convert #{file_name} -resize 800x #{file_name} )
# end
max_height = 600
backward_line_height = 20
work do |file_name|
/ (\d+)x(\d+) / =~ %x( identify #{file_name} )
width = Regexp.last_match(1).to_i
height = Regexp.last_match(2).to_i
$logger.debug "#{width}, #{height}"
crop_at = 0
count = 0
while true
FileUtils.mkdir_p "output/#{file_name[/.*\//]}"
cmd = %( convert #{file_name} -crop "#{width}x#{max_height}+#{0}+#{crop_at}" +repage output/#{file_name}_#{'%02d' % count}.gif )
$logger.debug cmd
%x( #{cmd} )
crop_at = crop_at + max_height - backward_line_height
break if crop_at > height
count += 1
end
end
require 'mechanize'
require 'logger'
class Downloader
def initialize(bookid, pageid)
@agent = Mechanize.new { |agent|
agent.user_agent_alias = 'Windows Mozilla'
}
@log = Logger.new(STDERR)
@bookid = bookid
@pageid = pageid
Dir.exists?(bookid.to_s) or Dir.mkdir(bookid.to_s)
end
def parse_and_download(url)
body = @agent.get(url).body
@log.debug "Chapter #{url}"
chapter = body[/\<title\>\D*(\d+).*\<\/title\>/, 1]
images = body.scan(/\<img src=\"(.*?)\".*?class="imagecontent">/).flatten
images.each do |url|
@log.debug "Downloading #{url}"
file_name = url[/\d+\.gif/]
file_path = "#{@bookid}/#{chapter}/#{file_name}"
Dir.exists? "#{@bookid}/#{chapter}" or Dir.mkdir "#{@bookid}/#{chapter}"
unless File.exists?(file_path) and File.stat(file_path).size > 10
@agent.get(url).save_as(file_path)
end
end
body[/next_page = \"(.*)\"/, 1]
end
def download_all
url = "http://www.chncode.com/bs/0/#{@bookid}/#{@pageid}.html"
while true
next_page = parse_and_download(url)
break if next_page.nil? or next_page[/index\.html/]
url = "http://www.chncode.com/bs/0/#{@bookid}/#{next_page}"
end
end
end
bookid = 707
pageid = 1627866
Downloader.new(bookid, pageid).download_all
@zellux
Copy link
Author

zellux commented Oct 3, 2011

To make a zip pack for every chapter, enter output/ and hit

ls -1 | xargs -I{} zip -0 -r '{}'.zip '{}'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment