Skip to content

Instantly share code, notes, and snippets.

@328
Last active September 13, 2020 13:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 328/67b2b4dff620adc32d1a66a9a3433094 to your computer and use it in GitHub Desktop.
Save 328/67b2b4dff620adc32d1a66a9a3433094 to your computer and use it in GitHub Desktop.
lambda-ruby-selenium-activerecord
require "active_record"
require "selenium-webdriver"
ActiveRecord::Base.establish_connection(
adapter: "mysql2",
host: ENV["DB_HOST"],
username: ENV["DB_USER"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"]
)
class Crawler < ActiveRecord::Base
self.table_name = 'crawler'
end
def lambda_handler(event:, context:)
driver = setup_driver
driver.navigate.to 'https://google.com'
title = driver.find_element(:tag_name, 'title')
data = Crawler.new
data.title = title.text
data.save
driver.quit
end
def setup_driver
service = Selenium::WebDriver::Service.chrome(path: '/opt/bin/chromedriver')
client = Selenium::WebDriver::Remote::Http::Default.new
client.read_timeout = 20 # seconds
Selenium::WebDriver.for :chrome, service: service, options: driver_options, http_client: client
end
def driver_options
options = Selenium::WebDriver::Chrome::Options.new(binary: '/opt/bin/headless-chromium')
arguments = ["--headless", "--disable-gpu", "--window-size=1280x1696", "--disable-application-cache", "--disable-infobars", "--no-sandbox", "--hide-scrollbars", "--enable-logging", "--log-level=0", "--single-process", "--ignore-certificate-errors" "--homedir=/tmp"]
arguments.each do |argument|
options.add_argument(argument)
end
options
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment