Skip to content

Instantly share code, notes, and snippets.

@fzero
Created December 20, 2019 01:26
Show Gist options
  • Save fzero/377e0719e94706f4a9aad81b18e6bf07 to your computer and use it in GitHub Desktop.
Save fzero/377e0719e94706f4a9aad81b18e6bf07 to your computer and use it in GitHub Desktop.
Threaded download from Flickr (not a complete script, but the main part)
require 'flickr_fu'
require 'httparty'
require 'pry'
DIR = "./photos"
MAXTHREADS = 30
# Prepare the downloadin'
`mkdir -p #{DIR}`
# Load flickr
flickr = Flickr.new('flickr.yml')
def download(url, filename)
return if File.exists?(filename)
response = HTTParty.get(url)
if response.code == 200 && response.body
File.open(filename, 'wb').write(response.body)
end
end
def pretty_taken_at(taken_at)
"#{taken_at.year}-#{sprintf('%02d', taken_at.month)}-" +
"#{sprintf('%02d', taken_at.day)}_#{sprintf('%02d', taken_at.hour)}-" +
"#{sprintf('%02d', taken_at.min)}-#{sprintf('%02d', taken_at.sec)}"
end
page = 1
pages = nil
threads = []
while photos = flickr.photos.search(user_id: '76764884@N00', page: page) do
# Flickr's pagination sucks,
pages ||= photos.pages
puts "\nPage #{page}/#{pages}\n\n"
photos.each do |photo|
taken_at = pretty_taken_at(photo.taken_at)
slugged_title = photo.title.gsub(/\W/, '_')
base_filename = "#{DIR}/#{taken_at}_#{photo.id}_#{slugged_title}"
photo_filename = "#{base_filename}.jpg"
text_filename = "#{base_filename}.txt"
photo_url = photo.url(:original)
if threads.size < MAXTHREADS
threads << Thread.new do
puts "Downloading \"#{photo.title}\" to #{photo_filename}"
download(photo_url, photo_filename)
# Write a text file with the original photo title and description
File.open(text_filename, 'w') do |f|
f.puts "#{photo.title}\n\n#{photo.description}"
end
end
else
threads.each do |thread|
thread.join
threads.delete(thread)
end
end
end
# Yep, we do it manually.
page += 1
if page > pages
while threads.size > 0
sleep 5
end
break
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment