Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save h1code2/d9906e011d0be560227703c05b372d04 to your computer and use it in GitHub Desktop.
Save h1code2/d9906e011d0be560227703c05b372d04 to your computer and use it in GitHub Desktop.
extract original content from chrome about:cache html source
import sys
import re
import gzip
import mimetypes
from mimetools import Message
from StringIO import StringIO
def parse_headers(raw_headers):
response_line, headers_text = raw_headers.split('\n', 1)
headers = Message(StringIO(headers_text))
return dict(headers)
def filter_blank(lines):
for line in lines:
line = line.strip()
if line != '':
yield line
def convert_cache_line(cache_line):
result = ''
for byte in cache_line.split(' ')[1:17]:
if byte == '':
break
result += chr(int(byte, 16))
return result
def gzip_decompress(data):
return gzip.GzipFile('', 'rb', 9, StringIO(data)).read()
def decode_cache_html(cache_html):
pre_regexp = re.compile('<pre>(.*?)</pre>', re.DOTALL)
matches = pre_regexp.findall(cache_html)
raw_headers = matches[0]
headers = parse_headers(raw_headers)
segments = []
for raw_content in filter_blank(matches[1:]):
segment = ''
for line in filter_blank(raw_content.split('\n')):
if line != '':
segment += convert_cache_line(line)
segments.append(segment)
return headers, segments
def main():
for filename in sys.argv[1:]:
with open(filename) as input_file:
cache_html = input_file.read()
headers, segments = decode_cache_html(cache_html)
# segment[0] is usually the header + certificate
data = segments[1]
if headers.get('content-encoding') == 'gzip':
data = gzip_decompress(data)
content_type = headers['content-type'].split(';')[0]
extension = mimetypes.guess_all_extensions(content_type)[0]
output_filename = filename + '-decoded' + extension
print 'writing to {0}'.format(output_filename)
with open(output_filename, 'w') as output_file:
output_file.write(data)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment