Created
September 25, 2010 11:59
-
-
Save deepflame/596774 to your computer and use it in GitHub Desktop.
Grabs all public photos from a friendster account ( www.friendster.com )
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'json' | |
require "cgi" | |
require 'fileutils' | |
def fdoc(rel_url) | |
Nokogiri::HTML(open("http://www.friendster.com#{rel_url}")) | |
end | |
def user_name(id = USER_ID) | |
@user_name ||= fdoc("/#{id}").css(".username").first.content.to_s.split(' ').map(&:capitalize).join(' ') | |
end | |
# | |
#configuration | |
# | |
USER_ID = 1234567 | |
DEST_DIR = "#{USER_ID} - #{user_name}" | |
puts "User: #{USER_ID} (#{user_name})" | |
def album_links(user_id = USER_ID) | |
links = [] | |
page_nr = 0 | |
while true | |
page = fdoc("/viewalbums.php?page=#{page_nr}&uid=#{user_id}") | |
new_links = page.css('a.albumThumb') | |
break if new_links.empty? # break out of the loop if no albums found anymore | |
links = links + new_links | |
page_nr = page_nr + 1 | |
end | |
puts "User has #{links.length} Album(s)" | |
links | |
end | |
def picture_json_strings(album_id, user_id = USER_ID) | |
json_strings = [] | |
page_nr = 0 | |
while true | |
page = fdoc("/viewphotos.php?page=#{page_nr}&a=#{album_id}&uid=#{user_id}") | |
break if page.css("div#photoGallery a").empty? | |
gallery_url = page.css("div#photoGallery a").first.attributes['href'] | |
gallery_site = fdoc(gallery_url) | |
script_tag = gallery_site.css("div#twoColContainer script").first | |
js_line = script_tag.content.split("\n").select {|l| l.include?('_pl')}.first | |
json = js_line.split('=', 2).last.strip.chop | |
json_strings << json | |
page_nr = page_nr + 1 | |
end | |
puts "Album has #{json_strings.length} page(s)" | |
json_strings | |
end | |
def sanitize_file_name(string) | |
string.gsub(/[^0-9A-Za-z.\-]/, ' ').gsub(/\s+/, ' ').strip | |
end | |
# Go through every album page | |
album_links.each do |album_link| | |
album_title = album_link.attributes['title'].to_s | |
album_url = album_link.attributes['href'].to_s | |
puts "" | |
# Skip private albums | |
if album_url.include?("private") | |
puts "Skipping Album: #{album_title} (private)" | |
next | |
end | |
puts "Album: '#{album_title}'" | |
#get Album ID from URL | |
album_id = CGI::parse(album_url.split('?').last)['a'] | |
dir_name = sanitize_file_name(album_title) | |
dir_path = File.join("#{DEST_DIR}", dir_name) | |
FileUtils.mkdir_p(dir_path) if not File.exists?(dir_path) | |
dir_entries = Dir.entries(dir_path) | |
picture_json_strings(album_id).each_with_index do |json, photo_page_nr| | |
puts "Downloading pictures from page #{photo_page_nr}" | |
photos = JSON.parse(json)['photos'] | |
photos.each_with_index do |photo, photo_nr| | |
photo_url = photo['url'] | |
photo_caption = photo['caption'] | |
photo_caption = sanitize_file_name(photo_caption) | |
photo_number = '%03d' % (photo_page_nr * 20 + photo_nr) | |
photo_name = photo_number | |
photo_name = "#{photo_number} #{photo_caption}" unless photo_caption.empty? | |
file_base_name = "#{photo_name}.jpg" | |
file_name = File.join(dir_path, file_base_name) | |
# Skip if file exists | |
if File.exist?(file_name) | |
puts "Skip: #{file_base_name} exists" | |
next | |
end | |
# Rename file with updated caption (only based on self generated photo number) | |
file_found = dir_entries.select {|f| File.basename(f).index(photo_number) == 0 } | |
unless file_found.empty? or file_found.first == file_base_name | |
file_found = File.join(dir_path, file_found.first) | |
puts "Rename: #{File.basename(file_found.first)} to #{file_base_name}" | |
FileUtils.mv(file_found, file_name) | |
next | |
end | |
# Download file | |
begin | |
File.open(file_name, 'wb') do |file| | |
puts "Download: #{File.basename(photo_url)} as #{File.basename(file_name)}" | |
file.write(open(photo_url).read) | |
sleep 0.5 | |
end | |
rescue | |
puts "ERROR saving #{photo_url}" | |
end | |
end | |
end | |
end | |
puts "FINISHED!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment