Skip to content

Instantly share code, notes, and snippets.

@mnot mnot/ Secret
Created Aug 19, 2016

What would you like to do?
Count the number of responses per origin in Chrome's HTTP cache.
#!/usr/bin/env python
Returns the number of responses for each origin in Chrome's HTTP
cache (fresh or stale).
Run with STDIN from the results of saving (as HTML, *not* a Web Archive):
from collections import defaultdict
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint
from urlparse import urlsplit
port_lookup = {
'https': 443,
'http': 80
class MyHTMLParser(HTMLParser):
def __init__(self):
self.in_a = False = '' = defaultdict(int)
def handle_starttag(self, tag, attrs):
if tag == "a":
self.in_a = True = ''
def handle_endtag(self, tag):
url = urlsplit(
if not url.port:
port = port_lookup.get(url.scheme, None)
origin = "(%s %s %s)" % (url.scheme, url.hostname, port)[origin] += 1
self.in_a = False
def handle_data(self, data):
if self.in_a: += data
def handle_entityref(self, name):
c = unichr(name2codepoint[name]) += c
if __name__ == "__main__":
import sys
parser = MyHTMLParser()
for line in sys.stdin.readlines():
for origin, count in
print count

This comment has been minimized.

Copy link

dcarley commented Aug 19, 2016

Gave an exception and traceback:

Traceback (most recent call last):
  File "", line 53, in <module>
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 117, in feed
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 163, in goahead
    k = self.parse_endtag(i)
  File "/opt/boxen/homebrew/Cellar/python/2.7.9/Frameworks/Python.framework/Versions/2.7/lib/python2.7/", line 401, in parse_endtag
  File "", line 37, in handle_endtag
    origin = "(%s %s %s)" % (url.scheme, url.hostname, port)
UnboundLocalError: local variable 'port' referenced before assignment

Should it be using and redefining url.port instead of port?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.