Skip to content

Instantly share code, notes, and snippets.

@ernstki
Last active May 12, 2018 07:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ernstki/a425071f1c9f2ae66f74139cd883b418 to your computer and use it in GitHub Desktop.
Save ernstki/a425071f1c9f2ae66f74139cd883b418 to your computer and use it in GitHub Desktop.
List, filter, and sort your Firefox profile's browsing history; requires Python 3.5 and the Click library (click.pocoo.org)
#!/usr/bin/env python
# coding: utf-8
#
# List, filter, and sort records from the SQLite database holding your Firefox
# profile's browsing history ('<profile>/places.sqlite#moz_places')
#
# In order to point this script at the correct Firefox profile, either use
# the '-p' option, or set an environment variable 'FFPROFILE' that is the
# complete path to the profile ('9ibBeri$h.default', for example)
#
# Yuuuge props to https://askubuntu.com/a/412890 for getting me started.
#
# Author: Kevin Ernst <ernstki -at- mail.uc.edu>
# Date: 11 May 2018
#
# Usage: ./ffhistory.py --help
#
import os
import re
import click
import sqlite3
# all available columns in 'moz_places' table; used for click.Choice, below
ALL_COLUMNS = 'id,url,title,rev_host,visit_count,hidden,typed,favicon_id,'\
'frecency,last_visit_date,guid,foreign_count,url_hash,'\
'description,preview_image_url'.split(',')
# these columns will appear in the output unless otherwise specified w/ '-c'
DEFAULT_COLUMNS = 'url,title,frecency,last_visit_date'.split(',')
# search these columns for PATTERNs
SEARCH_COLUMNS = ['url', 'title']
# characters that aren't allowed in PATTERN (it's a non-matching class)
BAD_CHARS = re.compile(r'[^-._/\w\s]')
# this is the default database name, as of forever and a day; could change?
PLACES_DB = os.getenv('DBFILE', 'places.sqlite')
# this is the name of the table containing the profile's browsing history
PLACES_TABLE = 'moz_places'
@click.command(help='List Firefox history, sorted by last access date '
"(by default). Searches 'url' and 'title' columns for "
'PATTERN, if specified.')
@click.option('-p', '--profile-path', envvar='FFPROFILE',
type=click.Path(exists=True),
help='Use this specific path to a Firefox profile. (default: '
'use $FFPROFILE from the environment)')
@click.option('-u', '--urls-only', is_flag=True,
help='Display only URLs (and no other columns).')
@click.option('-f', '--by-frecency', is_flag=True,
help='Sort visited sites by most "frecent. (ref: '
'https://en.wikipedia.org/wiki/Frecency)')
@click.option('-c', '--column', 'columns', multiple=True, metavar='COL',
default=DEFAULT_COLUMNS, type=click.Choice(ALL_COLUMNS),
help='Show just one (or, if repeated, several) specific '
"columns; use '-n' to get a list. [default: {}]"
.format(', '.join(DEFAULT_COLUMNS)))
@click.option('-h', '--headers', is_flag=True,
help='Include column headers in output.')
@click.option('-l', '--limit', type=int,
help='Limit output to just this many results (after sorting).')
@click.option('-a', '--and-patterns', is_flag=True,
help='ANDs all PATTERNs given on the command line, rather than '
'ORing them.')
@click.option('-s', '--strip-query-string', is_flag=True,
help="Strip the query string (anything after a '?') from URLs "
'in the output.')
@click.option('-n', '--column-names', is_flag=True,
help="Print a list of all column names (for use with the '-c' "
"option).")
@click.option('-d', '--delimiter', metavar='DELIM',
help='Use a custom output field delimiter. (default: tab)',
default="\t")
@click.option('--debug', is_flag=True,
help='Print prepared SQL statements to stderr.')
@click.argument('patterns', nargs=-1, required=False)
@click.pass_context
def list_ff_history(ctx, profile_path, urls_only, by_frecency, columns,
headers, limit, and_patterns, strip_query_string,
column_names, delimiter, debug, patterns):
"""
Emit the Firefox browsing history for the given profile, optionally
matching a pattern, optionally with selected columns and sort order
"""
try:
conn = sqlite3.connect(os.path.join(profile_path, PLACES_DB))
except AttributeError:
# this occurs when 'profile_path' is None (which happens sometimes
# even in spite of the 'type=click.Path(exists=True)')
click.secho('ERROR» ', nl=False, fg='red', err=True)
click.secho("You should specify a profile with '-p'.", bold=True,
err=True)
ctx.abort()
except IOError:
click.secho('ERROR» ', nl=False, fg='red', err=True)
click.secho("Could not open the SQLite database at '{}'."
.format(profile_path), bold=True, err=True)
ctx.abort()
curs = conn.cursor()
# just print the list of columns from the 'moz_places' table
# this is (should be) the same as ALL_COLUMNS, but just for kicks:
if column_names:
curs.row_factory = sqlite3.Row
curs.execute('SELECT * from moz_places')
for column in curs.fetchone().keys():
click.echo(column)
return
# otherwise...
if urls_only:
# because Click sends 'multiple=True' options back as tuples
if list(columns) != DEFAULT_COLUMNS:
click.secho('WARN» ', nl=False, fg='yellow', err=True)
click.secho("The '--urls-only' option will override any "
"'--column' options you specified.", bold=True,
err=True)
# otherwise:
columns = ('url',)
else:
# places store Unix epoch microseconds
# source: https://stackoverflow.com/a/19430099
datefix = lambda x: x + '/1000000' if x == 'last_visit_date' else x
columns = map(datefix, columns)
# if there are multiple patterns, modify the default 'WHERE' clause:
if not patterns:
whereclause = "WHERE url LIKE 'http%'" if 'url' in columns else ''
else:
# put all the specified PATTERNs in a parenthesized clause
_AND = ' AND '
_OR = ' OR '
wheres = []
for p in patterns:
# remove sneaky characters
if re.search(BAD_CHARS, p):
click.secho('WARN » ', fg='yellow', err=True, nl=False)
click.secho('Funny characters removed from PATTERN; this '
'may not match what you expect now.', bold=True,
err=True)
p = re.sub(BAD_CHARS, '', p)
likes = ["{} LIKE '%{}%'".format(c, p) for c in SEARCH_COLUMNS]
wheres.append('(' + _OR.join(likes) + ')')
if and_patterns:
whereclause = 'WHERE (' + _AND.join(wheres) + ')'
else:
whereclause = 'WHERE (' + _OR.join(wheres) + ')'
# in all cases (unless 'url' was left out of the columns list, match
# only http[s]:// URLs
whereclause += " AND url LIKE 'http%'" if 'url' in columns else ''
params = dict(
columns=','.join(columns),
tablename=PLACES_TABLE,
whereclause=whereclause,
sortcolumn='frecency' if by_frecency else 'last_visit_date',
limit="LIMIT {}".format(limit) if limit else '',
)
stmt = "SELECT {columns} FROM {tablename} {whereclause} "\
"ORDER BY {sortcolumn} DESC {limit};".format(**params)
if debug:
click.secho('DEBUG» ', nl=False, fg='magenta', err=True)
click.secho(stmt, bold=True, err=True)
curs.execute(stmt)
if headers:
click.echo(delimiter.join(columns))
intfix = lambda x: "{:n}".format(x) if type(x) is int else x
nonefix = lambda x: '<null>' if x is None else x
stripqs = lambda x: re.sub(r'\?.*', '', x) if x.startswith('http') else x
for row in curs.fetchall():
# you could replace 'n' with ',' to commafy the output, if you wanted
row = map(nonefix, map(intfix, row))
if strip_query_string:
row = map(stripqs, row)
click.echo(delimiter.join(row))
if __name__ == '__main__':
list_ff_history()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment