Last active
May 12, 2018 07:35
-
-
Save ernstki/a425071f1c9f2ae66f74139cd883b418 to your computer and use it in GitHub Desktop.
List, filter, and sort your Firefox profile's browsing history; requires Python 3.5 and the Click library (click.pocoo.org)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# | |
# List, filter, and sort records from the SQLite database holding your Firefox | |
# profile's browsing history ('<profile>/places.sqlite#moz_places') | |
# | |
# In order to point this script at the correct Firefox profile, either use | |
# the '-p' option, or set an environment variable 'FFPROFILE' that is the | |
# complete path to the profile ('9ibBeri$h.default', for example) | |
# | |
# Yuuuge props to https://askubuntu.com/a/412890 for getting me started. | |
# | |
# Author: Kevin Ernst <ernstki -at- mail.uc.edu> | |
# Date: 11 May 2018 | |
# | |
# Usage: ./ffhistory.py --help | |
# | |
import os | |
import re | |
import click | |
import sqlite3 | |
# all available columns in 'moz_places' table; used for click.Choice, below | |
ALL_COLUMNS = 'id,url,title,rev_host,visit_count,hidden,typed,favicon_id,'\ | |
'frecency,last_visit_date,guid,foreign_count,url_hash,'\ | |
'description,preview_image_url'.split(',') | |
# these columns will appear in the output unless otherwise specified w/ '-c' | |
DEFAULT_COLUMNS = 'url,title,frecency,last_visit_date'.split(',') | |
# search these columns for PATTERNs | |
SEARCH_COLUMNS = ['url', 'title'] | |
# characters that aren't allowed in PATTERN (it's a non-matching class) | |
BAD_CHARS = re.compile(r'[^-._/\w\s]') | |
# this is the default database name, as of forever and a day; could change? | |
PLACES_DB = os.getenv('DBFILE', 'places.sqlite') | |
# this is the name of the table containing the profile's browsing history | |
PLACES_TABLE = 'moz_places' | |
@click.command(help='List Firefox history, sorted by last access date ' | |
"(by default). Searches 'url' and 'title' columns for " | |
'PATTERN, if specified.') | |
@click.option('-p', '--profile-path', envvar='FFPROFILE', | |
type=click.Path(exists=True), | |
help='Use this specific path to a Firefox profile. (default: ' | |
'use $FFPROFILE from the environment)') | |
@click.option('-u', '--urls-only', is_flag=True, | |
help='Display only URLs (and no other columns).') | |
@click.option('-f', '--by-frecency', is_flag=True, | |
help='Sort visited sites by most "frecent. (ref: ' | |
'https://en.wikipedia.org/wiki/Frecency)') | |
@click.option('-c', '--column', 'columns', multiple=True, metavar='COL', | |
default=DEFAULT_COLUMNS, type=click.Choice(ALL_COLUMNS), | |
help='Show just one (or, if repeated, several) specific ' | |
"columns; use '-n' to get a list. [default: {}]" | |
.format(', '.join(DEFAULT_COLUMNS))) | |
@click.option('-h', '--headers', is_flag=True, | |
help='Include column headers in output.') | |
@click.option('-l', '--limit', type=int, | |
help='Limit output to just this many results (after sorting).') | |
@click.option('-a', '--and-patterns', is_flag=True, | |
help='ANDs all PATTERNs given on the command line, rather than ' | |
'ORing them.') | |
@click.option('-s', '--strip-query-string', is_flag=True, | |
help="Strip the query string (anything after a '?') from URLs " | |
'in the output.') | |
@click.option('-n', '--column-names', is_flag=True, | |
help="Print a list of all column names (for use with the '-c' " | |
"option).") | |
@click.option('-d', '--delimiter', metavar='DELIM', | |
help='Use a custom output field delimiter. (default: tab)', | |
default="\t") | |
@click.option('--debug', is_flag=True, | |
help='Print prepared SQL statements to stderr.') | |
@click.argument('patterns', nargs=-1, required=False) | |
@click.pass_context | |
def list_ff_history(ctx, profile_path, urls_only, by_frecency, columns, | |
headers, limit, and_patterns, strip_query_string, | |
column_names, delimiter, debug, patterns): | |
""" | |
Emit the Firefox browsing history for the given profile, optionally | |
matching a pattern, optionally with selected columns and sort order | |
""" | |
try: | |
conn = sqlite3.connect(os.path.join(profile_path, PLACES_DB)) | |
except AttributeError: | |
# this occurs when 'profile_path' is None (which happens sometimes | |
# even in spite of the 'type=click.Path(exists=True)') | |
click.secho('ERROR» ', nl=False, fg='red', err=True) | |
click.secho("You should specify a profile with '-p'.", bold=True, | |
err=True) | |
ctx.abort() | |
except IOError: | |
click.secho('ERROR» ', nl=False, fg='red', err=True) | |
click.secho("Could not open the SQLite database at '{}'." | |
.format(profile_path), bold=True, err=True) | |
ctx.abort() | |
curs = conn.cursor() | |
# just print the list of columns from the 'moz_places' table | |
# this is (should be) the same as ALL_COLUMNS, but just for kicks: | |
if column_names: | |
curs.row_factory = sqlite3.Row | |
curs.execute('SELECT * from moz_places') | |
for column in curs.fetchone().keys(): | |
click.echo(column) | |
return | |
# otherwise... | |
if urls_only: | |
# because Click sends 'multiple=True' options back as tuples | |
if list(columns) != DEFAULT_COLUMNS: | |
click.secho('WARN» ', nl=False, fg='yellow', err=True) | |
click.secho("The '--urls-only' option will override any " | |
"'--column' options you specified.", bold=True, | |
err=True) | |
# otherwise: | |
columns = ('url',) | |
else: | |
# places store Unix epoch microseconds | |
# source: https://stackoverflow.com/a/19430099 | |
datefix = lambda x: x + '/1000000' if x == 'last_visit_date' else x | |
columns = map(datefix, columns) | |
# if there are multiple patterns, modify the default 'WHERE' clause: | |
if not patterns: | |
whereclause = "WHERE url LIKE 'http%'" if 'url' in columns else '' | |
else: | |
# put all the specified PATTERNs in a parenthesized clause | |
_AND = ' AND ' | |
_OR = ' OR ' | |
wheres = [] | |
for p in patterns: | |
# remove sneaky characters | |
if re.search(BAD_CHARS, p): | |
click.secho('WARN » ', fg='yellow', err=True, nl=False) | |
click.secho('Funny characters removed from PATTERN; this ' | |
'may not match what you expect now.', bold=True, | |
err=True) | |
p = re.sub(BAD_CHARS, '', p) | |
likes = ["{} LIKE '%{}%'".format(c, p) for c in SEARCH_COLUMNS] | |
wheres.append('(' + _OR.join(likes) + ')') | |
if and_patterns: | |
whereclause = 'WHERE (' + _AND.join(wheres) + ')' | |
else: | |
whereclause = 'WHERE (' + _OR.join(wheres) + ')' | |
# in all cases (unless 'url' was left out of the columns list, match | |
# only http[s]:// URLs | |
whereclause += " AND url LIKE 'http%'" if 'url' in columns else '' | |
params = dict( | |
columns=','.join(columns), | |
tablename=PLACES_TABLE, | |
whereclause=whereclause, | |
sortcolumn='frecency' if by_frecency else 'last_visit_date', | |
limit="LIMIT {}".format(limit) if limit else '', | |
) | |
stmt = "SELECT {columns} FROM {tablename} {whereclause} "\ | |
"ORDER BY {sortcolumn} DESC {limit};".format(**params) | |
if debug: | |
click.secho('DEBUG» ', nl=False, fg='magenta', err=True) | |
click.secho(stmt, bold=True, err=True) | |
curs.execute(stmt) | |
if headers: | |
click.echo(delimiter.join(columns)) | |
intfix = lambda x: "{:n}".format(x) if type(x) is int else x | |
nonefix = lambda x: '<null>' if x is None else x | |
stripqs = lambda x: re.sub(r'\?.*', '', x) if x.startswith('http') else x | |
for row in curs.fetchall(): | |
# you could replace 'n' with ',' to commafy the output, if you wanted | |
row = map(nonefix, map(intfix, row)) | |
if strip_query_string: | |
row = map(stripqs, row) | |
click.echo(delimiter.join(row)) | |
if __name__ == '__main__': | |
list_ff_history() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment