lebowitz/extract_har.py

## extract_har.py
import json
import base64
import os
import pathlib
from urllib.parse import urlparse

# list of supported image mime-types
# Special thanks to https://gist.github.com/FurloSK/0477e01024f701db42341fc3223a5d8c
# Special mention, and thanks to MDN
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
mimetypes = {
    "image/webp": ".webp",
    "image/jpeg": ".jpeg", # *.jpg files have two possible extensions
    "image/jpeg": ".jpg",  #   (but .jpeg is official and thus preferred)
    "image/jpg": ".jpg",  #   (but .jpeg is official and thus preferred)
    "image/png": ".png",
    "image/svg+xml": ".svg",
    "image/avif": ".avif",
    "image/bmp": ".bmp",
    "image/gif": ".gif",
    "image/vnd.microsoft.icon": ".ico",
    "image/tiff": ".tif",  # *.tiff files have two possible extensions
    "image/tiff": ".tiff", #   (but .tiff is what I know and prefer)
}
# make sure the output directory exists before running!
folder = os.path.join(os.getcwd(), "imgs")
os.makedirs(folder, exist_ok=True)

with open("src.har", "rb") as f:
    har = json.loads(f.read())

entries = har["log"]["entries"]

for entry in entries:
    mimetype = entry["response"]["content"]["mimeType"]
    url = urlparse(entry["request"]["url"])
    filename = url.path[1:].replace('/','_')
    print(filename)
    response_text = entry["response"]["content"].get("text")
    encoding = entry["response"]["content"].get("encoding", "literal")
    if not response_text:
        continue

    # Python lets you lookup values against dictionaries using the in keyword
    if mimetype in mimetypes:
        ext = mimetypes[mimetype]
        file = os.path.join(folder, f"{filename}{ext}")
        print(file)
        with open(file, "wb") as f:
            f.write(
                response_text.encode(encoding = "UTF-8", errors = "strict")
                if encoding == "literal"
                else base64.b64decode(response_text)
            )
	import json
	import base64
	import os
	import pathlib
	from urllib.parse import urlparse

	# list of supported image mime-types
	# Special thanks to https://gist.github.com/FurloSK/0477e01024f701db42341fc3223a5d8c
	# Special mention, and thanks to MDN
	# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Common_types
	mimetypes = {
	"image/webp": ".webp",
	"image/jpeg": ".jpeg", # *.jpg files have two possible extensions
	"image/jpeg": ".jpg", # (but .jpeg is official and thus preferred)
	"image/jpg": ".jpg", # (but .jpeg is official and thus preferred)
	"image/png": ".png",
	"image/svg+xml": ".svg",
	"image/avif": ".avif",
	"image/bmp": ".bmp",
	"image/gif": ".gif",
	"image/vnd.microsoft.icon": ".ico",
	"image/tiff": ".tif", # *.tiff files have two possible extensions
	"image/tiff": ".tiff", # (but .tiff is what I know and prefer)
	}
	# make sure the output directory exists before running!
	folder = os.path.join(os.getcwd(), "imgs")
	os.makedirs(folder, exist_ok=True)

	with open("src.har", "rb") as f:
	har = json.loads(f.read())

	entries = har["log"]["entries"]

	for entry in entries:
	mimetype = entry["response"]["content"]["mimeType"]
	url = urlparse(entry["request"]["url"])
	filename = url.path[1:].replace('/','_')
	print(filename)
	response_text = entry["response"]["content"].get("text")
	encoding = entry["response"]["content"].get("encoding", "literal")
	if not response_text:
	continue

	# Python lets you lookup values against dictionaries using the in keyword
	if mimetype in mimetypes:
	ext = mimetypes[mimetype]
	file = os.path.join(folder, f"{filename}{ext}")
	print(file)
	with open(file, "wb") as f:
	f.write(
	response_text.encode(encoding = "UTF-8", errors = "strict")
	if encoding == "literal"
	else base64.b64decode(response_text)
	)