Skip to content

Instantly share code, notes, and snippets.

@rene-d
Last active June 5, 2023 18:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rene-d/8a5161f95365343e9a24c73640d12ff7 to your computer and use it in GitHub Desktop.
Save rene-d/8a5161f95365343e9a24c73640d12ff7 to your computer and use it in GitHub Desktop.
Chromium-like browsers cache viewer (Linux, macOS)
#!/usr/bin/env python3
# Chromium-like browsers cache viewer (Linux, macOS)
import argparse
import binascii
import struct
from datetime import datetime
from pathlib import Path
import traceback
from cryptography.x509 import load_der_x509_certificate # pip3 install cryptography
# https://chromium.googlesource.com/chromium/src/+/master/net/disk_cache/simple/simple_entry_format.h
kSimpleInitialMagicNumber = 0xFCFB6D1BA7725C30
kSimpleFinalMagicNumber = 0xF4FA6F45970D41D8
FLAG_HAS_CRC32 = 1
FLAG_HAS_KEY_SHA256 = 2
# https://chromium.googlesource.com/chromium/src/+/master/base/time/time.h
kTimeTToMicrosecondsOffset = 11644473600000000
# https://chromium.googlesource.com/chromium/src/+/master/net/http/http_response_info.cc
RESPONSE_INFO_HAS_CERT = 1 << 8
RESPONSE_INFO_HAS_SECURITY_BITS = 1 << 9
RESPONSE_INFO_HAS_CERT_STATUS = 1 << 10
RESPONSE_INFO_HAS_VARY_DATA = 1 << 11
RESPONSE_INFO_TRUNCATED = 1 << 12
RESPONSE_INFO_WAS_SPDY = 1 << 13
RESPONSE_INFO_WAS_ALPN = 1 << 14
RESPONSE_INFO_WAS_PROXY = 1 << 15
RESPONSE_INFO_HAS_SSL_CONNECTION_STATUS = 1 << 16
RESPONSE_INFO_HAS_ALPN_NEGOTIATED_PROTOCOL = 1 << 17
RESPONSE_INFO_HAS_CONNECTION_INFO = 1 << 18
RESPONSE_INFO_USE_HTTP_AUTHENTICATION = 1 << 19
RESPONSE_INFO_HAS_SIGNED_CERTIFICATE_TIMESTAMPS = 1 << 20
RESPONSE_INFO_UNUSED_SINCE_PREFETCH = 1 << 21
RESPONSE_INFO_HAS_KEY_EXCHANGE_GROUP = 1 << 22
RESPONSE_INFO_PKP_BYPASSED = 1 << 23
RESPONSE_INFO_HAS_STALENESS = 1 << 24
RESPONSE_INFO_HAS_PEER_SIGNATURE_ALGORITHM = 1 << 25
RESPONSE_INFO_RESTRICTED_PREFETCH = 1 << 26
RESPONSE_INFO_HAS_DNS_ALIASES = 1 << 27
RESPONSE_INFO_SINGLE_KEYED_CACHE_ENTRY_UNUSABLE = 1 << 28
RESPONSE_INFO_ENCRYPTED_CLIENT_HELLO = 1 << 29
RESPONSE_INFO_BROWSER_RUN_ID = 1 << 30
# https://chromium.googlesource.com/chromium/src/+/master/net/ssl/ssl_connection_status_flags.h
SSL_CONNECTION_VERSION_SHIFT = 20
SSL_CONNECTION_VERSION_MASK = 7
# https://chromium.googlesource.com/chromium/src/+/master/net/ssl/ssl_connection_status_flags.h
def SSLConnectionStatusToVersion(connection_status):
return (connection_status >> SSL_CONNECTION_VERSION_SHIFT) & SSL_CONNECTION_VERSION_MASK
def bits(a):
s = []
b = 0
while a != 0:
if a & 1 != 0:
s.append(b)
b += 1
a = a // 2
return ",".join(map(str, s))
class ChromiumCache:
def __init__(self):
self.verbose = False
self.show_url = []
def extract(self, f: Path):
b = f.read_bytes()
# A file containing stream 0 and stream 1 in the Simple cache consists of:
# - a SimpleFileHeader.
# - the key.
# - the data from stream 1.
# - a SimpleFileEOF record for stream 1.
# - the data from stream 0.
# - (optionally) the SHA256 of the key.
# - a SimpleFileEOF record for stream 0.
##################################################################
# SimpleFileHeader (24 bytes)
magic, version, key_length, _, zero = struct.unpack("<QIIII", b[:24])
assert magic == kSimpleInitialMagicNumber
assert version == 5
assert zero == 0
##################################################################
# the key
key = b[24 : 24 + key_length]
if key[:8] == b"1/0/_dk_" or key[:4] == b"_dk_":
url = key.decode().split(" ", maxsplit=2)[2]
else:
url = key.decode().split(" ")[-1]
##################################################################
# SimpleFileEOF of stream0 (24 bytes)
magic, flags, _, stream0_length, zero = struct.unpack("<QIIII", b[-24:])
assert magic == kSimpleFinalMagicNumber
assert flags == FLAG_HAS_CRC32 + FLAG_HAS_KEY_SHA256
assert zero == 0
##################################################################
# SimpleFileEOF of stream1 (24 bytes)
offset = len(b) - (24 + stream0_length + 32 + 24)
magic, flags, _, stream1_length, zero = struct.unpack("<QIIII", b[offset : offset + 24])
offset += 24 # skip the SimpleFileEOF of stream1
assert magic == kSimpleFinalMagicNumber
assert flags == FLAG_HAS_CRC32
assert zero == 0
assert offset == 24 + key_length + stream1_length + 24
##################################################################
# stream1 (e.g. the payload)
data = b[24 + key_length : 24 + key_length + stream1_length]
##################################################################
# stream0 (HTTP response info)
if stream0_length == 0:
if not self.show_url or any(i in url for i in self.show_url):
print(url)
if not self.verbose:
print(" stream0_length", stream0_length)
print(" stream1_length", stream1_length)
return
# header of the steam0: cf. HttpResponseInfo::InitFromPickle
length, flags, request_time, response_time = struct.unpack("<IIQQ", b[offset : offset + 24])
assert stream0_length == length + 4
offset += 24
# the HTTP headers
(http_headers_length,) = struct.unpack("<I", b[offset : offset + 4])
http_headers = b[offset + 4 : offset + 4 + http_headers_length]
assert http_headers[-2:] == b"\x00\x00"
http_headers = list(map(bytes.decode, http_headers[:-2].split(b"\0")))
offset += 4 + ((http_headers_length + 3) // 4) * 4 # alignment on 4 bytes
if not self.show_url or any(i in url for i in self.show_url):
request_time = datetime.fromtimestamp((request_time - kTimeTToMicrosecondsOffset) / 1_000_000)
response_time = datetime.fromtimestamp((response_time - kTimeTToMicrosecondsOffset) / 1_000_000)
print(url)
if not self.verbose:
return
print(" stream0_length", stream0_length)
print(" stream1_length", stream1_length)
print(" http_headers")
for h in http_headers:
print(f" {h}")
print(" payload length", stream1_length, "bytes")
print(" flags ", hex(flags), f"version:{flags & 0xFF} bits:{bits(flags & ~0xFF)}")
print(" request_time ", request_time)
print(" response_time ", response_time)
if flags & RESPONSE_INFO_HAS_CERT != 0:
(nb_certs,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
for i in range(nb_certs):
(length,) = struct.unpack("<I", b[offset : offset + 4])
# certificate in DER format
certicate = b[offset + 4 : offset + 4 + length]
offset = offset + 4 + (length + 3) // 4 * 4 # align 4
certicate = load_der_x509_certificate(certicate)
print(
f" certificate ({length} bytes) {certicate.issuer.rfc4514_string()} {certicate.not_valid_after}"
)
if flags & RESPONSE_INFO_HAS_CERT_STATUS != 0:
(zero,) = struct.unpack("<I", b[offset : offset + 4])
# assert zero == 0
offset += 4
print(f" cert_status {hex(zero)}")
assert flags & RESPONSE_INFO_HAS_SECURITY_BITS == 0
if flags & RESPONSE_INFO_HAS_SSL_CONNECTION_STATUS != 0:
(connection_status,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
print(
" ssl_connection_status",
hex(connection_status),
f"version:{SSLConnectionStatusToVersion(connection_status)}",
)
assert flags & RESPONSE_INFO_HAS_SIGNED_CERTIFICATE_TIMESTAMPS == 0
# Read vary-data
if flags & RESPONSE_INFO_HAS_VARY_DATA != 0:
print(" vary_data", binascii.b2a_hex(b[offset : offset + 16]))
# it is actually a MD5Digest
# cf. https://chromium.googlesource.com/chromium/src/+/master/net/http/http_vary_data.h
offset += 16
# Read socket_address.
(ip_length,) = struct.unpack("<I", b[offset : offset + 4])
ip = b[offset + 4 : offset + 4 + ip_length].decode()
print(" ip", ip)
offset = offset + 4 + (ip_length + 3) // 4 * 4 # align 4
(port,) = struct.unpack("<I", b[offset : offset + 4])
print(" port", port)
offset += 4
if flags & RESPONSE_INFO_HAS_ALPN_NEGOTIATED_PROTOCOL != 0:
(length,) = struct.unpack("<I", b[offset : offset + 4])
alpn_negotiated_protocol = b[offset + 4 : offset + 4 + length].decode()
print(" alpn_negotiated_protocol", alpn_negotiated_protocol)
offset = offset + 4 + (length + 3) // 4 * 4 # align 4
if flags & RESPONSE_INFO_HAS_CONNECTION_INFO != 0:
(unk,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
print(" connection_info", hex(unk))
if flags & RESPONSE_INFO_HAS_KEY_EXCHANGE_GROUP != 0:
(key_exchange_group,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
print(" key_exchange_group", hex(key_exchange_group))
assert flags & RESPONSE_INFO_HAS_STALENESS == 0
if flags & RESPONSE_INFO_HAS_PEER_SIGNATURE_ALGORITHM != 0:
(peer_signature_algorithm,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
print(" peer_signature_algorithm", hex(peer_signature_algorithm))
if flags & RESPONSE_INFO_HAS_DNS_ALIASES != 0:
(nb_dns,) = struct.unpack("<I", b[offset : offset + 4])
offset += 4
for i in range(nb_dns):
(length,) = struct.unpack("<I", b[offset : offset + 4])
dns = b[offset + 4 : offset + 4 + length].decode()
offset = offset + 4 + (length + 3) // 4 * 4 # align 4
print(" dns", dns)
assert offset == len(b) - 24 - 32
def analyze(self, path):
for f in Path(path).expanduser().glob("*_0"):
try:
self.extract(f)
except (AssertionError, UnicodeDecodeError, struct.error) as e:
print(traceback.print_exception(e))
print(f"error reading: {f}")
exit()
def add_show_url(self, pattern):
self.show_url.append(pattern)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-u", "--url")
parser.add_argument("-f", "--file", type=Path)
args = parser.parse_args()
bc = ChromiumCache()
if args.file:
bc.verbose = True
bc.extract(args.file)
else:
bc.verbose = args.verbose
if args.url:
bc.show_url.append(args.url)
bc.analyze("~/.cache/chromium/Default/Cache")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment