Skip to content

Instantly share code, notes, and snippets.

@mnot mnot/chrome_cache_parse.py Secret
Created Aug 19, 2016

Embed
What would you like to do?
Count the number of responses per origin in Chrome's HTTP cache.
#!/usr/bin/env python
"""
Returns the number of responses for each origin in Chrome's HTTP
cache (fresh or stale).
Run with STDIN from the results of saving (as HTML, *not* a Web Archive):
chrome://view-http-cache/
"""
from collections import defaultdict
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
from urlparse import urlsplit
port_lookup = {
'https': 443,
'http': 80
}
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.in_a = False
self.data = ''
self.origins = defaultdict(int)
def handle_starttag(self, tag, attrs):
if tag == "a":
self.in_a = True
self.data = ''
def handle_endtag(self, tag):
url = urlsplit(self.data)
if not url.port:
port = port_lookup.get(url.scheme, None)
origin = "(%s %s %s)" % (url.scheme, url.hostname, port)
self.origins[origin] += 1
self.in_a = False
def handle_data(self, data):
if self.in_a:
self.data += data
def handle_entityref(self, name):
c = unichr(name2codepoint[name])
self.data += c
if __name__ == "__main__":
import sys
parser = MyHTMLParser()
for line in sys.stdin.readlines():
parser.feed(line)
for origin, count in parser.origins.items():
print count
@dcarley

This comment has been minimized.

Copy link

dcarley commented Aug 19, 2016

Gave an exception and traceback:

Traceback (most recent call last):
  File "chrome_cache_parse.py", line 53, in <module>
    parser.feed(line)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 117, in feed
    self.goahead(0)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 163, in goahead
    k = self.parse_endtag(i)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 401, in parse_endtag
    self.handle_endtag(elem)
  File "chrome_cache_parse.py", line 37, in handle_endtag
    origin = "(%s %s %s)" % (url.scheme, url.hostname, port)
UnboundLocalError: local variable 'port' referenced before assignment

Should it be using and redefining url.port instead of port?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.