Skip to content

Instantly share code, notes, and snippets.

@d-schmidt
Created January 16, 2017 14:52
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save d-schmidt/090f7f905bb75fbd5c8e5528ccf792ba to your computer and use it in GitHub Desktop.
Save d-schmidt/090f7f905bb75fbd5c8e5528ccf792ba to your computer and use it in GitHub Desktop.
Cleaning the Chrome browser history with Python
#!/usr/bin/env python3
import sqlite3
import re
# find your 'History' file
conn = sqlite3.connect('c:/Users/username/AppData/Local/Google/Chrome/User Data/Default/History')
c = conn.cursor()
print("history length", c.execute('SELECT count(1) FROM urls').fetchone()[0])
domainPattern = re.compile(r"https?://([^/]+)/")
domains = {}
result = True
id = 0
while result:
result = False
ids = []
for row in c.execute('SELECT id, url, title FROM urls WHERE id > ? LIMIT 1000', (id,)):
result = True
match = domainPattern.search(row[1])
id = row[0]
if match:
domain = match.group(1)
domains[domain] = domains.get(domain, 0) + 1
# clean if this is true
if "imgur" in domain:
ids.append((id,))
c.executemany('DELETE FROM urls WHERE id=?', ids)
conn.commit()
conn.close()
import pprint
pprint.pprint(domains)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment