Skip to content

Instantly share code, notes, and snippets.

@NaelsonDouglas
Created June 17, 2023 06:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NaelsonDouglas/01d97018c6deee8c1faa1e4b9fd0faea to your computer and use it in GitHub Desktop.
Save NaelsonDouglas/01d97018c6deee8c1faa1e4b9fd0faea to your computer and use it in GitHub Desktop.
Here is a boilerplate of a Python Selenium web scrapper able to download the HTML source code of any page.
import ssl
from time import sleep
from selenium import webdriver
from selenium.webdriver import firefox
ssl._create_default_https_context = ssl._create_unverified_context
class SeleniumConnector():
driver = None
def __call__(self, search:str, wait:int=2) -> str:
'''
Gets the html from the website
'''
driver = self._get_selenium_driver()
driver.get(search)
sleep(wait)
source = driver.page_source
return source
def __new__(cls):
''' Defines the class as a singleton, thus preventing multiple browser instances'''
if not hasattr(cls, 'instance'):
cls.instance = super(SeleniumConnector, cls).__new__(cls)
return cls.instance
def _get_selenium_driver(self) -> webdriver.Firefox:
if not self.driver:
options = firefox.options.Options()
options.add_argument('--headless')
options.add_argument('javascript.enabled')
options.add_argument('--no-sandbox')
self.driver = webdriver.Firefox(service=firefox.service.Service('geckodriver'), options=options)
return self.driver
if __name__ == '__main__':
selenium = SeleniumConnector()
source = selenium('https://www.google.com/')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment