Last active
June 15, 2020 09:13
-
-
Save cmnord/a5f0692723fdddda6062170bd9c62372 to your computer and use it in GitHub Desktop.
Chrome browser history analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based off of https://geekswipe.net/technology/computing/analyze-chromes-browsing-history-with-python/ | |
# Must use python 2 | |
# Close Chrome before running this, or else the database will be locked | |
import os | |
import sqlite3 | |
import operator | |
from collections import OrderedDict | |
import matplotlib.pyplot as plt | |
def parse(url): | |
try: | |
parsed_url_components = url.split('//') | |
sublevel_split = parsed_url_components[1].split('/', 1) | |
domain = sublevel_split[0].replace("www.", "") | |
return domain | |
except IndexError: | |
print("URL format error!") | |
def analyze(results): | |
prompt = raw_input("[.] Type <c> to print or <p> to plot\n[>] ") | |
if prompt == "c": | |
for site, count in sites_count_sorted.items(): | |
print site, count | |
elif prompt == "p": | |
s = int(raw_input("Plot how many top sites? ")) | |
plt.bar(range(s), results.values()[:s], align='edge') | |
plt.xticks(rotation=20) | |
plt.xticks(range(s), results.keys()[:s]) | |
plt.title("Top-Visited Websites") | |
plt.show() | |
else: | |
print "[.] Uh?" | |
quit() | |
#path to user's history database (Chrome) | |
data_path = os.path.expanduser('~')+"/.config/google-chrome/Default/" | |
files = os.listdir(data_path) | |
history_db = os.path.join(data_path, 'History') | |
#querying the db | |
c = sqlite3.connect(history_db) | |
cursor = c.cursor() | |
select_statement = "SELECT urls.url, urls.visit_count FROM urls, visits WHERE urls.id = visits.url;" | |
cursor.execute(select_statement) | |
results = cursor.fetchall() #tuple | |
sites_count = {} #dict makes iterations easier :D | |
for url, count in results: | |
url = parse(url) | |
if url in sites_count: | |
sites_count[url] += 1 | |
else: | |
sites_count[url] = 1 | |
sites_count_sorted = OrderedDict(sorted(sites_count.items(), key=operator.itemgetter(1), reverse=True)) | |
analyze(sites_count_sorted) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment