Skip to content

Instantly share code, notes, and snippets.

@cmnord
Last active June 15, 2020 09:13
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cmnord/a5f0692723fdddda6062170bd9c62372 to your computer and use it in GitHub Desktop.
Save cmnord/a5f0692723fdddda6062170bd9c62372 to your computer and use it in GitHub Desktop.
Chrome browser history analysis
# Based off of https://geekswipe.net/technology/computing/analyze-chromes-browsing-history-with-python/
# Must use python 2
# Close Chrome before running this, or else the database will be locked
import os
import sqlite3
import operator
from collections import OrderedDict
import matplotlib.pyplot as plt
def parse(url):
try:
parsed_url_components = url.split('//')
sublevel_split = parsed_url_components[1].split('/', 1)
domain = sublevel_split[0].replace("www.", "")
return domain
except IndexError:
print("URL format error!")
def analyze(results):
prompt = raw_input("[.] Type <c> to print or <p> to plot\n[>] ")
if prompt == "c":
for site, count in sites_count_sorted.items():
print site, count
elif prompt == "p":
s = int(raw_input("Plot how many top sites? "))
plt.bar(range(s), results.values()[:s], align='edge')
plt.xticks(rotation=20)
plt.xticks(range(s), results.keys()[:s])
plt.title("Top-Visited Websites")
plt.show()
else:
print "[.] Uh?"
quit()
#path to user's history database (Chrome)
data_path = os.path.expanduser('~')+"/.config/google-chrome/Default/"
files = os.listdir(data_path)
history_db = os.path.join(data_path, 'History')
#querying the db
c = sqlite3.connect(history_db)
cursor = c.cursor()
select_statement = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;"
cursor.execute(select_statement)
results = cursor.fetchall() #tuple
sites_count = {} #dict makes iterations easier :D
for url, count in results:
url = parse(url)
if url in sites_count:
sites_count[url] += 1
else:
sites_count[url] = 1
sites_count_sorted = OrderedDict(sorted(sites_count.items(), key=operator.itemgetter(1), reverse=True))
analyze(sites_count_sorted)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment