Skip to content

@bdcravens /grab-tapas.rb
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Ruby Tapas scraper
require "mechanize"
agent = Mechanize.new
agent.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " + \
"AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5" + \
" Safari/536.30.1"
page = agent.get "https://rubytapas.dpdcart.com/subscriber/content"
login_form = page.form_with id:"login-form"
login_form.field_with(id:"username").value = tapas_username
login_form.field_with(id:"password").value = tapas_password
content_page = agent.submit login_form
def friendly_filename(filename)
filename.gsub(/[^\w\s_-]+/, '')
.gsub(/(^|\b\s)\s+($|\s?\b)/, '\\1\\2')
.gsub(/\s+/, '_')
end
content_page.links_with(text:/File Attachment/).each do |link|
files_page = link.click
dir_name_pre = files_page.parser.css('title').text.strip.split('|').first
dir_name = friendly_filename(dir_name_pre.strip)
Dir.mkdir(dir_name) unless File.directory?(dir_name)
description = files_page.parser.css('div.blog-content').text.strip
if !File.exists?([dir_name,'description.txt'].join("/"))
File.open([dir_name,'description.txt'].join("/"),"w") do |f|
f.puts description
end
end
puts "Process tapa: #{dir_name}"
files_page.links_with(href:/download/).each do |file|
if !File.exists?([dir_name,file.text].join("/"))
file.click.save([dir_name,file.text].join("/"))
end
puts "#{file.text} downloaded"
end
puts '******************************************************************'
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.