Skip to content

Instantly share code, notes, and snippets.

@deepflame
Created September 25, 2010 11:59
Show Gist options
  • Save deepflame/596774 to your computer and use it in GitHub Desktop.
Save deepflame/596774 to your computer and use it in GitHub Desktop.
Grabs all public photos from a friendster account ( www.friendster.com )
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'json'
require "cgi"
require 'fileutils'
def fdoc(rel_url)
Nokogiri::HTML(open("http://www.friendster.com#{rel_url}"))
end
def user_name(id = USER_ID)
@user_name ||= fdoc("/#{id}").css(".username").first.content.to_s.split(' ').map(&:capitalize).join(' ')
end
#
#configuration
#
USER_ID = 1234567
DEST_DIR = "#{USER_ID} - #{user_name}"
puts "User: #{USER_ID} (#{user_name})"
def album_links(user_id = USER_ID)
links = []
page_nr = 0
while true
page = fdoc("/viewalbums.php?page=#{page_nr}&uid=#{user_id}")
new_links = page.css('a.albumThumb')
break if new_links.empty? # break out of the loop if no albums found anymore
links = links + new_links
page_nr = page_nr + 1
end
puts "User has #{links.length} Album(s)"
links
end
def picture_json_strings(album_id, user_id = USER_ID)
json_strings = []
page_nr = 0
while true
page = fdoc("/viewphotos.php?page=#{page_nr}&a=#{album_id}&uid=#{user_id}")
break if page.css("div#photoGallery a").empty?
gallery_url = page.css("div#photoGallery a").first.attributes['href']
gallery_site = fdoc(gallery_url)
script_tag = gallery_site.css("div#twoColContainer script").first
js_line = script_tag.content.split("\n").select {|l| l.include?('_pl')}.first
json = js_line.split('=', 2).last.strip.chop
json_strings << json
page_nr = page_nr + 1
end
puts "Album has #{json_strings.length} page(s)"
json_strings
end
def sanitize_file_name(string)
string.gsub(/[^0-9A-Za-z.\-]/, ' ').gsub(/\s+/, ' ').strip
end
# Go through every album page
album_links.each do |album_link|
album_title = album_link.attributes['title'].to_s
album_url = album_link.attributes['href'].to_s
puts ""
# Skip private albums
if album_url.include?("private")
puts "Skipping Album: #{album_title} (private)"
next
end
puts "Album: '#{album_title}'"
#get Album ID from URL
album_id = CGI::parse(album_url.split('?').last)['a']
dir_name = sanitize_file_name(album_title)
dir_path = File.join("#{DEST_DIR}", dir_name)
FileUtils.mkdir_p(dir_path) if not File.exists?(dir_path)
dir_entries = Dir.entries(dir_path)
picture_json_strings(album_id).each_with_index do |json, photo_page_nr|
puts "Downloading pictures from page #{photo_page_nr}"
photos = JSON.parse(json)['photos']
photos.each_with_index do |photo, photo_nr|
photo_url = photo['url']
photo_caption = photo['caption']
photo_caption = sanitize_file_name(photo_caption)
photo_number = '%03d' % (photo_page_nr * 20 + photo_nr)
photo_name = photo_number
photo_name = "#{photo_number} #{photo_caption}" unless photo_caption.empty?
file_base_name = "#{photo_name}.jpg"
file_name = File.join(dir_path, file_base_name)
# Skip if file exists
if File.exist?(file_name)
puts "Skip: #{file_base_name} exists"
next
end
# Rename file with updated caption (only based on self generated photo number)
file_found = dir_entries.select {|f| File.basename(f).index(photo_number) == 0 }
unless file_found.empty? or file_found.first == file_base_name
file_found = File.join(dir_path, file_found.first)
puts "Rename: #{File.basename(file_found.first)} to #{file_base_name}"
FileUtils.mv(file_found, file_name)
next
end
# Download file
begin
File.open(file_name, 'wb') do |file|
puts "Download: #{File.basename(photo_url)} as #{File.basename(file_name)}"
file.write(open(photo_url).read)
sleep 0.5
end
rescue
puts "ERROR saving #{photo_url}"
end
end
end
end
puts "FINISHED!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment