Skip to content

Instantly share code, notes, and snippets.

@dunhamsteve
Last active April 20, 2023 21:46
Show Gist options
  • Save dunhamsteve/6d84c60a8dbaca0d2465d0731837d40d to your computer and use it in GitHub Desktop.
Save dunhamsteve/6d84c60a8dbaca0d2465d0731837d40d to your computer and use it in GitHub Desktop.
Scans the chrome service worker caches and prints url and size for each directory
#!/usr/bin/env python3
# Scans the chrome service worker caches and prints url and size for each directory
# Add -v to the command line for more details
import struct, os, functools, sys
def uvarint(data,pos):
x = s = 0
while True:
b = data[pos]
pos += 1
x = x | ((b&0x7f)<<s)
if b < 0x80: return x,pos
s += 7
def readbytes(data,pos):
l,pos = uvarint(data,pos)
return data[pos:pos+l], pos+l
def readstruct(fmt,l):
return lambda data,pos: (struct.unpack_from(fmt,data,pos)[0],pos+l)
readers = [ uvarint, readstruct('<d',8), readbytes, None, None, readstruct('<f',4) ]
def parse(data, schema):
"parses a protobuf"
obj = {}
pos = 0
while pos < len(data):
val,pos = uvarint(data,pos)
typ = val & 7
key = val >> 3
val, pos = readers[typ](data,pos)
if key not in schema:
continue
name, repeated, typ = schema[key]
if isinstance(typ, dict):
val = parse(val, typ)
if typ == 'string':
val = val.decode('utf8')
if repeated:
val = obj.get(name,[]) + [val]
obj[name] = val
return obj
dname = os.path.expanduser('~/Library/Application Support/Google/Chrome/Default/Service Worker/CacheStorage')
schema = {
1: ["workers",1,{
1: ["name",0,"string"],
2: ["path", 0,"string"],
3: ["size",0,0],
5: [".5",0,0],
6: [".6",0,0],
}],
2: ["urla", 0, "string"],
3: ["urlb", 0, "string"],
4: [".4",0,0],
5: [".5",0,0],
}
records = []
for entry in os.scandir(dname):
fn = os.path.join(dname, entry, 'index.txt')
data = open(fn,'rb').read()
obj = parse(data,schema)
obj['name'] = entry.name
obj['size'] = sum(a['size'] for a in obj.get('workers',[]))
records.append(obj)
records.sort(key=lambda x: x['size'])
verbose = '-v' in sys.argv
for obj in records:
total = obj['size']
print(obj['name'], obj['urla'], total/1024./1024.)
if not verbose: continue
for w in obj.get('workers',[]):
size = w['size']
print('-',w['name'], w['path'],w['size'])
print('total',total/1024./1024., 'MB')
print('---')
# Print directory so I can easily cd to it
print(dname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment