Skip to content

Instantly share code, notes, and snippets.

@hmatringe
Created May 19, 2022 11:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hmatringe/a33b6ddd2522fffdf07aa82eccba4aa2 to your computer and use it in GitHub Desktop.
Save hmatringe/a33b6ddd2522fffdf07aa82eccba4aa2 to your computer and use it in GitHub Desktop.
require 'ferrum'
require 'open-uri'
require 'pry'
require 'csv'
puts "What recipes are we looking for?"
query = gets.chomp
puts "How many pages of results do we need? (sorted by relevance)"
max_pages = gets.chomp.to_i
puts 'loading the magic'
3.times{ puts '' }
def click_away_gdpr_popup(browser)
expr = <<~JS
document.querySelector('button[mode="primary"]')?.click()
JS
browser.execute expr
end
def move_like_a_human(browser)
sleep rand(0.5..1.5)
expr = <<~JS
window.scrollTo(0, #{rand(400..1200)})
JS
browser.execute expr
end
browser_options = {
timeout: 10,
process_timeout: 10,
headless: false
}
scraped_cards = []
page = 1
while page <= max_pages
puts "handling page #{page}"
browser = Ferrum::Browser.new browser_options
browser.goto "https://www.bbcgoodfood.com/search/recipes/page/#{page}/?q=#{query}&sort=-relevance"
click_away_gdpr_popup(browser)
move_like_a_human(browser)
begin
retries ||= 0
puts "trying to find recipes => retries #{retries}"
browser.at_css('.standard-card-new--skinny')
rescue
browser.refresh
move_like_a_human(browser)
sleep 5
retry if (retries += 1) < 3
end
if browser.css('.standard-card-new--skinny').count.zero?
puts 'no more recipes! Bye now'
break
end
browser.css('.standard-card-new--skinny').each do |card|
title = card.at_css('.standard-card-new__display-title').inner_text
descr = card.at_css('.standard-card-new__description').inner_text
img_url = card.at_css('.img-container__image').attribute 'src'
scraped_cards << [title, descr, img_url]
end
2.times{ puts '' }
puts 'scraped_cards.count'
puts scraped_cards.count
page += 1
browser.quit
end
puts 'making that csv now'
csv_title = "recipes for #{query} #{Time.now.strftime('%F %H_%M')}.csv"
CSV.open(csv_title, "w") do |csv|
csv << ["Title", "Description", "Image Url"]
scraped_cards.each do |scraped_card|
csv << scraped_card
end
end
puts "#{csv_title} created! Look for it in the same folder!"
3.times{ puts '' }
puts 'magic happened'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment