Skip to content

Instantly share code, notes, and snippets.

@thatbudakguy
Created July 8, 2021 14:04
Show Gist options
  • Save thatbudakguy/e35e72835f1766450998a9612404bb57 to your computer and use it in GitHub Desktop.
Save thatbudakguy/e35e72835f1766450998a9612404bb57 to your computer and use it in GitHub Desktop.
percy scrapy crawler
from percy import percy_snapshot
from scrapy.spiders import SitemapSpider
from selenium import webdriver
class PercySpider(SitemapSpider):
"""Sitemap crawler that uploads DOM snapshots to Percy."""
name = "cdhweb"
sitemap_urls = ["http://localhost:8000/sitemap.xml"]
allowed_domains = ["localhost"] # don't follow external links
device_widths = [375, 768, 1280] # breakpoints for this project
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.get_browser()
def get_browser(self):
"""Create a browser driver to use for taking snapshots."""
# NOTE using Chrome here is arbitrary; we're just sending the DOM
# snapshot so Percy can render it in the cloud (in parallel).
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-setuid-sandbox")
options.add_argument("--headless")
self.browser = webdriver.Chrome(
"node_modules/chromedriver/bin/chromedriver", options=options
)
def parse(self, response):
"""Take a snapshot of a single URL and upload to Percy."""
self.browser.get(response.url)
percy_snapshot(
self.browser, # use configured browser for snapshots
self.browser.title, # use page <title> for snapshot title
width=self.device_widths, # take snapshots at all screen sizes
)
def closed(self, reason):
"""Shut down the browser used for taking snapshots."""
self.browser.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment