Skip to content

Instantly share code, notes, and snippets.

@mnot
Created August 19, 2016 07:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mnot/793fcfb0d003e87ea7e8035c43eafdb9 to your computer and use it in GitHub Desktop.
Save mnot/793fcfb0d003e87ea7e8035c43eafdb9 to your computer and use it in GitHub Desktop.
Count the number of responses per origin in Chrome's HTTP cache.
#!/usr/bin/env python
"""
Returns the number of responses for each origin in Chrome's HTTP
cache (fresh or stale).
Run with STDIN from the results of saving (as HTML, *not* a Web Archive):
chrome://view-http-cache/
"""
from collections import defaultdict
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
from urlparse import urlsplit
port_lookup = {
'https': 443,
'http': 80
}
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.in_a = False
self.data = ''
self.origins = defaultdict(int)
def handle_starttag(self, tag, attrs):
if tag == "a":
self.in_a = True
self.data = ''
def handle_endtag(self, tag):
url = urlsplit(self.data)
if not url.port:
port = port_lookup.get(url.scheme, None)
origin = "(%s %s %s)" % (url.scheme, url.hostname, port)
self.origins[origin] += 1
self.in_a = False
def handle_data(self, data):
if self.in_a:
self.data += data
def handle_entityref(self, name):
c = unichr(name2codepoint[name])
self.data += c
if __name__ == "__main__":
import sys
parser = MyHTMLParser()
for line in sys.stdin.readlines():
parser.feed(line)
for origin, count in parser.origins.items():
print count
@dcarley
Copy link

dcarley commented Aug 19, 2016

Gave an exception and traceback:

Traceback (most recent call last):
  File "chrome_cache_parse.py", line 53, in <module>
    parser.feed(line)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 117, in feed
    self.goahead(0)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 163, in goahead
    k = self.parse_endtag(i)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/HTMLParser.py", line 401, in parse_endtag
    self.handle_endtag(elem)
  File "chrome_cache_parse.py", line 37, in handle_endtag
    origin = "(%s %s %s)" % (url.scheme, url.hostname, port)
UnboundLocalError: local variable 'port' referenced before assignment

Should it be using and redefining url.port instead of port?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment