Skip to content

Instantly share code, notes, and snippets.

@martyglaubitz
Last active May 21, 2021 08:22
Show Gist options
  • Save martyglaubitz/3a7a29b30718f638281536e2110afb49 to your computer and use it in GitHub Desktop.
Save martyglaubitz/3a7a29b30718f638281536e2110afb49 to your computer and use it in GitHub Desktop.
Script which given a URL to a Sitemap, takes each URL and performs a Lighthouse check on it
import csv
import dataclasses
import json
import io
import os
import os.path
import subprocess
import sys
import urllib.parse
import urllib.request
import xml.dom.minidom
@dataclasses.dataclass
class LighthouseResult:
url: str
accessibility: float
best_practices: float
performance: float
pwa: float
seo: float
def get_sitemap_urls(sitemap_url):
response = urllib.request.urlopen(sitemap_url)
sitemap_xml = response.read().decode('utf-8')
document = xml.dom.minidom.parseString(sitemap_xml)
return [node.firstChild.nodeValue for node in document.getElementsByTagName('loc')]
def get_lighthouse_result(url: str) -> LighthouseResult:
url_parsed = urllib.parse.urlparse(url)
# create a valid filename from the URL
output_filename = url_parsed.hostname + url_parsed.path.replace('/', '-') + '.json'
target_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), output_filename)
if not os.path.exists(target_path):
subprocess.call(['lighthouse', '--output', 'json', '--output-path', target_path, url], shell=True)
with open(target_path, encoding='utf-8') as json_file:
result_json = json.load(json_file)
# here is the place to extract more information from the lighthouse result
categories = result_json['categories']
accessibility = categories['accessibility']
performance = categories['performance']
pwa = categories['pwa']
best_practices = categories['best-practices']
seo = categories['seo']
return LighthouseResult(
url=url,
accessibility=accessibility['score'],
best_practices=best_practices['score'],
performance=performance['score'],
pwa=pwa['score'],
seo=seo['score']
)
def write_csv(lighthouse_results, csv_filepath: str):
with open(csv_filepath, 'w', newline='', encoding='utf-8') as csvfile:
csv_writer = csv.DictWriter(csvfile, fieldnames=['url','performance', 'accessibility', 'best_practices', 'pwa', 'seo'], delimiter='|', lineterminator='\n')
csv_writer.writeheader()
for result in lighthouse_results:
csv_writer.writerow(dict(
url=result.url,
performance=result.performance,
accessibility=result.accessibility,
best_practices=result.best_practices,
pwa=result.pwa,
seo=result.seo
))
if __name__ == "__main__":
# usage: python sitemap_lighthouse.py https://your.domain/sitemap.xml lighthouse-results.csv
# TODO: use argparse for this
sitemap_url = sys.argv[1]
csv_filepath = sys.argv[2]
if not sitemap_url or not csv_filepath:
exit(1)
else:
urls = get_sitemap_urls(sitemap_url)
url_results = []
for url in urls:
url_results.append(get_lighthouse_result(url))
write_csv(url_results, csv_filepath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment