require 'rubygems' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'httparty' | |
unless ARGV[0] && ARGV[1] | |
puts "Usage: ruby sketchup_downloader.rb path/to/terms.csv path/to/folder" | |
exit 1 | |
end | |
SEARCH_TERMS = open(ARGV[0]).read.split(",") | |
TARGET_FOLDER = ARGV[1] | |
puts "Search terms: #{SEARCH_TERMS.inspect}" | |
puts "Target folder: #{TARGET_FOLDER}" | |
ROOT_URL = "http://sketchup.google.com" | |
# collect all known file names | |
@known_models = Dir.glob("#{TARGET_FOLDER}/*/*.skp").collect{|path| path.split("/").last} | |
puts "Known models: #{@known_models.join(",")}" | |
def download_all_pages_for(term, start=0) | |
fetch_url = "#{ROOT_URL}/3dwarehouse/search?q=#{URI.escape(term)}&start=#{start}&scoring=t" | |
puts "fetching URL #{fetch_url}" | |
page = open(fetch_url).read | |
doc = Nokogiri::HTML(page) | |
results = doc.css(".dwnld") | |
puts "#{results.length} results (#{start}-#{start+11})" | |
doc.css(".dwnld").each do |link| | |
download_url = ROOT_URL + link["href"] | |
# parse the model number out of the url | |
model_number = download_url.split(/\&|\?|=/)[2] | |
# if we already have a file for that model, skip it | |
if(@known_models.include? "#{model_number}.skp") | |
puts "skipping #{model_number}.skp. we already have it" | |
else | |
puts "downloading #{download_url}" | |
result = HTTParty.get download_url | |
filename = result.headers["x-3dwarehouse-modelid"] + ".skp" | |
File.open("#{TARGET_FOLDER}/#{term}/#{filename}", "w"){|f| f << result.parsed_response } | |
# add this new filename to the list of known models | |
@known_models << filename | |
end | |
end | |
if not doc.css(".pager_next").empty? | |
download_all_pages_for(term, start+12) | |
end | |
end | |
SEARCH_TERMS.each do |term| | |
puts | |
puts "Searching for #{term}..." | |
puts "========================" | |
puts "mkdir #{TARGET_FOLDER}/#{term}" | |
`mkdir "#{TARGET_FOLDER}/#{term}"` | |
puts | |
download_all_pages_for( term ) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment