Skip to content

Instantly share code, notes, and snippets.

@kylemcdonald
Forked from atduskgreg/sketchup_downloader.rb
Created January 16, 2012 09:45
Show Gist options
  • Save kylemcdonald/1620009 to your computer and use it in GitHub Desktop.
Save kylemcdonald/1620009 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'httparty'
unless ARGV[0] && ARGV[1]
puts "Usage: ruby sketchup_downloader.rb path/to/terms.csv path/to/folder"
exit 1
end
SEARCH_TERMS = open(ARGV[0]).read.split(",")
TARGET_FOLDER = ARGV[1]
puts "Search terms: #{SEARCH_TERMS.inspect}"
puts "Target folder: #{TARGET_FOLDER}"
ROOT_URL = "http://sketchup.google.com"
# collect all known file names
@known_models = Dir.glob("#{TARGET_FOLDER}/*/*.skp").collect{|path| path.split("/").last}
puts "Known models: #{@known_models.join(",")}"
def download_all_pages_for(term, start=0)
fetch_url = "#{ROOT_URL}/3dwarehouse/search?q=#{URI.escape(term)}&start=#{start}&scoring=t"
puts "fetching URL #{fetch_url}"
page = open(fetch_url).read
doc = Nokogiri::HTML(page)
results = doc.css(".dwnld")
puts "#{results.length} results (#{start}-#{start+11})"
doc.css(".dwnld").each do |link|
download_url = ROOT_URL + link["href"]
# parse the model number out of the url
model_number = download_url.split(/\&|\?|=/)[2]
# if we already have a file for that model, skip it
if(@known_models.include? "#{model_number}.skp")
puts "skipping #{model_number}.skp. we already have it"
else
puts "downloading #{download_url}"
result = HTTParty.get download_url
filename = result.headers["x-3dwarehouse-modelid"] + ".skp"
File.open("#{TARGET_FOLDER}/#{term}/#{filename}", "w"){|f| f << result.parsed_response }
# add this new filename to the list of known models
@known_models << filename
end
end
if not doc.css(".pager_next").empty?
download_all_pages_for(term, start+12)
end
end
SEARCH_TERMS.each do |term|
puts
puts "Searching for #{term}..."
puts "========================"
puts "mkdir #{TARGET_FOLDER}/#{term}"
`mkdir "#{TARGET_FOLDER}/#{term}"`
puts
download_all_pages_for( term )
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment