Skip to content

Instantly share code, notes, and snippets.

@ndrluis
Forked from akitaonrails/Gemfile
Last active August 29, 2015 14:11
Show Gist options
  • Save ndrluis/06c35d032176908c1862 to your computer and use it in GitHub Desktop.
Save ndrluis/06c35d032176908c1862 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'bundler/setup'
require 'mechanize'
require 'typhoeus'
require 'fileutils'
require 'rmagick'
require 'prawn'
require 'fastimage'
agent = Mechanize.new
agent.user_agent = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
# customize these 2 variables to download the mangas you want
manga_root_url = "http://www.mangareader.net/206/hikaru-no-go.html"
manga_root_folder = "/vagrant/tmp/mangareader"
page = agent.get manga_root_url
# TODO scans from 1st chapter all the way to the end, must add a param to skip chapters
first_chapter_link = page.search("#listing//a").first['href']
manga_title = page.search("h1").first.text
download_list_backup_file = "/tmp/#{manga_title}.bkp"
downloaded_status_file = "/tmp/#{manga_title}.downloaded"
chapter = agent.get "http://www.mangareader.net#{first_chapter_link}"
# serial link scanning
download_links = []
download_links = Marshal.load(File.read(download_list_backup_file)) if File.exists?(download_list_backup_file)
if download_links.empty?
while true
begin
chapter = chapter.link_with(text: 'Next').click
rescue
puts "Finished scanning links"
break
end
begin
image = chapter.at("#img")
tokens = image['alt'].match("^(.*?)\s\-\s(.*?)$")
download_links << [tokens[1], "#{tokens[2]}#{File.extname(URI.parse(image['src']).path)}", image['src']]
puts download_links.last.join(' - ')
rescue => e
puts e
end
end
File.open(download_list_backup_file, 'w') { |f| f.write(Marshal.dump(download_links)) }
end
# parallel downloads
unless File.exists?(downloaded_status_file)
hydra = Typhoeus::Hydra.new(max_concurrency: 50)
download_links.each do |file|
begin
request = Typhoeus::Request.new file.last
request.on_complete do |response|
FileUtils.mkdir_p(File.join(manga_root_folder, file[0]))
downloaded_filename = File.join(manga_root_folder, file[0], file[1])
File.open(downloaded_filename, "wb+") { |f| f.write response.body }
image = Magick::Image.read( downloaded_filename ).first
resized = image.resize_to_fit(600, 800)
resized.write( downloaded_filename ) { self.quality = 50 }
puts "File #{downloaded_filename} downloaded and resized."
GC.start # to avoid a leak too big
end
hydra.queue request
rescue => e
puts e
end
end
hydra.run
FileUtils.touch(downloaded_status_file)
end
# concatenating PDF files (250 pages per volume)
chapter_number = 0
while !download_links.empty?
chapter_number += 1
pdf_file = File.join(manga_root_folder, "#{manga_title} #{chapter_number}.pdf")
list = download_links.slice!(0..250)
Prawn::Document.generate(pdf_file, page_size: [600, 800]) do |pdf|
list.map { |file| File.join( manga_root_folder, chapter_name, file[1] ) }.each do |image_file|
pdf.image image_file, position: :center, vposition: :center
end
end
puts "#{pdf_file} compiled."
GC.start
end
# cleanup
FileUtils.rm downloaded_status_file
source 'http://rubygems.org'
gem 'mechanize'
gem 'typhoeus'
gem 'rmagick'
gem 'prawn'
gem 'fastimage'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment