paranoidxc/Db_Album.rb

## Db_Album.rb
#encoding: UTF-8
# Author: XiaochuanHuang
# Email : emohuang [at] gmail [dot] com

require 'open-uri'
require "net/http"
require "uri"
require 'hpricot'

class Db_Album

  def url_get_content(url)

    url = URI.parse( url )
    req = Net::HTTP::Get.new(url.path+'?'+url.query.to_s )
    req.add_field("X-Forwarded-For", "0.0.0.0")
    req.add_field('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0')
    res = Net::HTTP.new(url.host, url.port).start do |http|
      http.request(req)
    end
    return res.body

  end

  def url_to_file(url)

    File.open('album.txt', "wb") do |file|
      file.write( self.url_get_content(url) )
    end

  end

  def initialize(url="http://www.douban.com/photos/album/44503288/")
    dir     = 'images/'
    @dir    = File.join(File.dirname(__FILE__), dir)
    if !Dir.exists?(@dir)
      Dir.mkdir(@dir, 0777)
      #Dir.mkdir(File.join(File.dirname(__FILE__), @dir), 0777)
    end
    @page   = 0;
    @offset = 18
    @url    = url
    @sleep_photo      = 0.2
    @sleep_album_page = 4
    self.album_page
    puts ' ablum download done :) '

  end

  def album_page

    start = @page*@offset
    url = start == 0 ? @url : url = @url +'?start='+start.to_s
    #file=open('db.txt')
    #text=file.read
    #doc = Hpricot(text)
    #puts url

    puts 'start album page '+ url
    content = url_get_content(url)
    doc = Hpricot(content)
    photos = doc.search('.photolst>.photo_wrap>a.photolst_photo')
    if photos.count != 0
      doc.search('.photolst>.photo_wrap>a.photolst_photo') do |photo|
        self.album_photo_dl( photo.attributes['href'] )
        sleep @sleep_photo
      end
      sleep @sleep_album_page
      @page = @page + 1
      self.album_page
    end

  end

  def album_photo_dl(url)

    print 'Photo Url '+ url + ' '
    content = url_get_content(url)
    photo = Hpricot(content)
    is_large = false
    img = nil

    # parse is not a large photo to download
    # set img element with large photo
    photo.search('.report-link a') do |large|
      is_large = true
      large_page = Hpricot( url_get_content( large.attributes['href'] ) )
      img = large_page.search('#pic-viewer a img')
    end

    # if not have large size photo download
    # set img element with default size photo
    if !is_large
      img = photo.search('.mainphoto img')
    end

    # download the photo
    if img.length
      img = img[0]
      src = img.attributes['src']
      u =  URI.parse(src)
      save_file_path = @dir + u.path.split('/')[-1]
      if !File.file?( save_file_path )
        print ' Start Download... !'
        open( save_file_path, 'wb') do |file|
          file << open(src).read
        end
        puts ' done !'
      else
        puts ' Photo File exist Skip '
      end
    end

  end

end

# Tips:
# The album photos will save to `images` directory under the current Db_Album.rb file directory
# Will Create `images` directory if not exist
# Will Download album photo if not exist in `images` directory

# How to use ?  $ ruby Db_Album.rb
# Db_Album.new( douban_album_url )
# Db_Album.new("http://www.douban.com/photos/album/87115884/") # large photo
Db_Album.new("http://www.douban.com/photos/album/46907826/")   # default size photo
	#encoding: UTF-8
	# Author: XiaochuanHuang
	# Email : emohuang [at] gmail [dot] com

	require 'open-uri'
	require "net/http"
	require "uri"
	require 'hpricot'

	class Db_Album

	def url_get_content(url)

	url = URI.parse( url )
	req = Net::HTTP::Get.new(url.path+'?'+url.query.to_s )
	req.add_field("X-Forwarded-For", "0.0.0.0")
	req.add_field('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0')
	res = Net::HTTP.new(url.host, url.port).start do \|http\|
	http.request(req)
	end
	return res.body

	end

	def url_to_file(url)

	File.open('album.txt', "wb") do \|file\|
	file.write( self.url_get_content(url) )
	end

	end

	def initialize(url="http://www.douban.com/photos/album/44503288/")
	dir = 'images/'
	@dir = File.join(File.dirname(__FILE__), dir)
	if !Dir.exists?(@dir)
	Dir.mkdir(@dir, 0777)
	#Dir.mkdir(File.join(File.dirname(__FILE__), @dir), 0777)
	end
	@page = 0;
	@offset = 18
	@url = url
	@sleep_photo = 0.2
	@sleep_album_page = 4
	self.album_page
	puts ' ablum download done :) '

	end

	def album_page

	start = @page*@offset
	url = start == 0 ? @url : url = @url +'?start='+start.to_s
	#file=open('db.txt')
	#text=file.read
	#doc = Hpricot(text)
	#puts url

	puts 'start album page '+ url
	content = url_get_content(url)
	doc = Hpricot(content)
	photos = doc.search('.photolst>.photo_wrap>a.photolst_photo')
	if photos.count != 0
	doc.search('.photolst>.photo_wrap>a.photolst_photo') do \|photo\|
	self.album_photo_dl( photo.attributes['href'] )
	sleep @sleep_photo
	end
	sleep @sleep_album_page
	@page = @page + 1
	self.album_page
	end

	end

	def album_photo_dl(url)

	print 'Photo Url '+ url + ' '
	content = url_get_content(url)
	photo = Hpricot(content)
	is_large = false
	img = nil

	# parse is not a large photo to download
	# set img element with large photo
	photo.search('.report-link a') do \|large\|
	is_large = true
	large_page = Hpricot( url_get_content( large.attributes['href'] ) )
	img = large_page.search('#pic-viewer a img')
	end

	# if not have large size photo download
	# set img element with default size photo
	if !is_large
	img = photo.search('.mainphoto img')
	end

	# download the photo
	if img.length
	img = img[0]
	src = img.attributes['src']
	u = URI.parse(src)
	save_file_path = @dir + u.path.split('/')[-1]
	if !File.file?( save_file_path )
	print ' Start Download... !'
	open( save_file_path, 'wb') do \|file\|
	file << open(src).read
	end
	puts ' done !'
	else
	puts ' Photo File exist Skip '
	end
	end

	end

	end

	# Tips:
	# The album photos will save to `images` directory under the current Db_Album.rb file directory
	# Will Create `images` directory if not exist
	# Will Download album photo if not exist in `images` directory

	# How to use ? $ ruby Db_Album.rb
	# Db_Album.new( douban_album_url )
	# Db_Album.new("http://www.douban.com/photos/album/87115884/") # large photo
	Db_Album.new("http://www.douban.com/photos/album/46907826/") # default size photo