Skip to content

Instantly share code, notes, and snippets.

@msguner
Forked from rengler33/scrape_with_logs.py
Created December 27, 2021 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save msguner/a10c96aababb5e87faec56e9253fae40 to your computer and use it in GitHub Desktop.
Save msguner/a10c96aababb5e87faec56e9253fae40 to your computer and use it in GitHub Desktop.
How to Capture Network Traffic When Scraping with Selenium & Python
# see rkengler.com for related blog post
# https://www.rkengler.com/how-to-capture-network-traffic-when-scraping-with-selenium-and-python/
import json
import pprint
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
capabilities = DesiredCapabilities.CHROME
# capabilities["loggingPrefs"] = {"performance": "ALL"} # chromedriver < ~75
capabilities["goog:loggingPrefs"] = {"performance": "ALL"} # chromedriver 75+
driver = webdriver.Chrome(
r"chromedriver.exe",
desired_capabilities=capabilities,
)
def process_browser_logs_for_network_events(logs):
"""
Return only logs which have a method that start with "Network.response", "Network.request", or "Network.webSocket"
since we're interested in the network events specifically.
"""
for entry in logs:
log = json.loads(entry["message"])["message"]
if (
"Network.response" in log["method"]
or "Network.request" in log["method"]
or "Network.webSocket" in log["method"]
):
yield log
driver.get("https://www.rkengler.com")
logs = driver.get_log("performance")
events = process_browser_logs_for_network_events(logs)
with open("log_entries.txt", "wt") as out:
for event in events:
pprint.pprint(event, stream=out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment