ernstki/ffhist.py

## ffhist.py
#!/usr/bin/env python
# coding: utf-8
#
#  List, filter, and sort records from the SQLite database holding your Firefox
#  profile's browsing history ('<profile>/places.sqlite#moz_places')
#
#  In order to point this script at the correct Firefox profile, either use
#  the '-p' option, or set an environment variable 'FFPROFILE' that is the
#  complete path to the profile ('9ibBeri$h.default', for example)
#
#  Yuuuge props to https://askubuntu.com/a/412890 for getting me started.
#
#  Author:  Kevin Ernst <ernstki -at- mail.uc.edu>
#  Date:    11 May 2018
#
#  Usage:   ./ffhistory.py --help
#
import os
import re
import click
import sqlite3

# all available columns in 'moz_places' table; used for click.Choice, below
ALL_COLUMNS = 'id,url,title,rev_host,visit_count,hidden,typed,favicon_id,'\
              'frecency,last_visit_date,guid,foreign_count,url_hash,'\
              'description,preview_image_url'.split(',')

# these columns will appear in the output unless otherwise specified w/ '-c'
DEFAULT_COLUMNS = 'url,title,frecency,last_visit_date'.split(',')

# search these columns for PATTERNs
SEARCH_COLUMNS = ['url', 'title']

# characters that aren't allowed in PATTERN (it's a non-matching class)
BAD_CHARS = re.compile(r'[^-._/\w\s]')

# this is the default database name, as of forever and a day; could change?
PLACES_DB = os.getenv('DBFILE', 'places.sqlite')

# this is the name of the table containing the profile's browsing history
PLACES_TABLE = 'moz_places'


@click.command(help='List Firefox history, sorted by last access date '
                    "(by default). Searches 'url' and 'title' columns for "
                    'PATTERN, if specified.')
@click.option('-p', '--profile-path', envvar='FFPROFILE',
              type=click.Path(exists=True),
              help='Use this specific path to a Firefox profile. (default: '
                   'use $FFPROFILE from the environment)')
@click.option('-u', '--urls-only', is_flag=True,
              help='Display only URLs (and no other columns).')
@click.option('-f', '--by-frecency', is_flag=True,
              help='Sort visited sites by most "frecent. (ref: '
                   'https://en.wikipedia.org/wiki/Frecency)')
@click.option('-c', '--column', 'columns', multiple=True, metavar='COL',
              default=DEFAULT_COLUMNS, type=click.Choice(ALL_COLUMNS),
              help='Show just one (or, if repeated, several) specific '
                   "columns; use '-n' to get a list. [default: {}]"
                   .format(', '.join(DEFAULT_COLUMNS)))
@click.option('-h', '--headers', is_flag=True,
              help='Include column headers in output.')
@click.option('-l', '--limit', type=int,
              help='Limit output to just this many results (after sorting).')
@click.option('-a', '--and-patterns', is_flag=True,
              help='ANDs all PATTERNs given on the command line, rather than '
                   'ORing them.')
@click.option('-s', '--strip-query-string', is_flag=True,
              help="Strip the query string (anything after a '?') from URLs "
                   'in the output.')
@click.option('-n', '--column-names', is_flag=True,
              help="Print a list of all column names (for use with the '-c' "
                   "option).")
@click.option('-d', '--delimiter', metavar='DELIM',
              help='Use a custom output field delimiter. (default: tab)',
              default="\t")
@click.option('--debug', is_flag=True,
              help='Print prepared SQL statements to stderr.')
@click.argument('patterns', nargs=-1, required=False)
@click.pass_context
def list_ff_history(ctx, profile_path, urls_only, by_frecency, columns,
                    headers, limit, and_patterns, strip_query_string,
                    column_names, delimiter, debug, patterns):
    """
    Emit the Firefox browsing history for the given profile, optionally
    matching a pattern, optionally with selected columns and sort order
    """
    try:
        conn = sqlite3.connect(os.path.join(profile_path, PLACES_DB))
    except AttributeError:
        # this occurs when 'profile_path' is None (which happens sometimes
        # even in spite of the 'type=click.Path(exists=True)')
        click.secho('ERROR» ', nl=False, fg='red', err=True)
        click.secho("You should specify a profile with '-p'.", bold=True,
                    err=True)
        ctx.abort()
    except IOError:
        click.secho('ERROR» ', nl=False, fg='red', err=True)
        click.secho("Could not open the SQLite database at '{}'."
                    .format(profile_path), bold=True, err=True)
        ctx.abort()

    curs = conn.cursor()

    # just print the list of columns from the 'moz_places' table
    # this is (should be) the same as ALL_COLUMNS, but just for kicks:
    if column_names:
        curs.row_factory = sqlite3.Row
        curs.execute('SELECT * from moz_places')
        for column in curs.fetchone().keys():
            click.echo(column)
        return

    # otherwise...
    if urls_only:
        # because Click sends 'multiple=True' options back as tuples
        if list(columns) != DEFAULT_COLUMNS:
            click.secho('WARN» ', nl=False, fg='yellow', err=True)
            click.secho("The '--urls-only' option will override any "
                        "'--column' options you specified.", bold=True,
                        err=True)
        # otherwise:
        columns = ('url',)
    else:
        # places store Unix epoch microseconds
        # source: https://stackoverflow.com/a/19430099
        datefix = lambda x: x + '/1000000' if x == 'last_visit_date' else x
        columns = map(datefix, columns)

    # if there are multiple patterns, modify the default 'WHERE' clause:
    if not patterns:
        whereclause = "WHERE url LIKE 'http%'" if 'url' in columns else ''

    else:
        # put all the specified PATTERNs in a parenthesized clause
        _AND = ' AND '
        _OR = ' OR '
        wheres = []

        for p in patterns:
            # remove sneaky characters
            if re.search(BAD_CHARS, p):
                click.secho('WARN » ', fg='yellow', err=True, nl=False)
                click.secho('Funny characters removed from PATTERN; this '
                            'may not match what you expect now.', bold=True,
                            err=True)
                p = re.sub(BAD_CHARS, '', p)

            likes = ["{} LIKE '%{}%'".format(c, p) for c in SEARCH_COLUMNS]
            wheres.append('(' + _OR.join(likes) + ')')

        if and_patterns:
            whereclause = 'WHERE (' + _AND.join(wheres) + ')'
        else:
            whereclause = 'WHERE (' + _OR.join(wheres) + ')'

        # in all cases (unless 'url' was left out of the columns list, match
        # only http[s]:// URLs
        whereclause += " AND url LIKE 'http%'" if 'url' in columns else ''

    params = dict(
        columns=','.join(columns),
        tablename=PLACES_TABLE,
        whereclause=whereclause,
        sortcolumn='frecency' if by_frecency else 'last_visit_date',
        limit="LIMIT {}".format(limit) if limit else '',
    )

    stmt = "SELECT {columns} FROM {tablename} {whereclause} "\
           "ORDER BY {sortcolumn} DESC {limit};".format(**params)

    if debug:
        click.secho('DEBUG» ', nl=False, fg='magenta', err=True)
        click.secho(stmt, bold=True, err=True)

    curs.execute(stmt)

    if headers:
        click.echo(delimiter.join(columns))

    intfix = lambda x: "{:n}".format(x) if type(x) is int else x
    nonefix = lambda x: '<null>' if x is None else x
    stripqs = lambda x: re.sub(r'\?.*', '', x) if x.startswith('http') else x

    for row in curs.fetchall():
        # you could replace 'n' with ',' to commafy the output, if you wanted
        row = map(nonefix, map(intfix, row))

        if strip_query_string:
            row = map(stripqs, row)

        click.echo(delimiter.join(row))


if __name__ == '__main__':
    list_ff_history()
	#!/usr/bin/env python
	# coding: utf-8
	#
	# List, filter, and sort records from the SQLite database holding your Firefox
	# profile's browsing history ('<profile>/places.sqlite#moz_places')
	#
	# In order to point this script at the correct Firefox profile, either use
	# the '-p' option, or set an environment variable 'FFPROFILE' that is the
	# complete path to the profile ('9ibBeri$h.default', for example)
	#
	# Yuuuge props to https://askubuntu.com/a/412890 for getting me started.
	#
	# Author: Kevin Ernst <ernstki -at- mail.uc.edu>
	# Date: 11 May 2018
	#
	# Usage: ./ffhistory.py --help
	#
	import os
	import re
	import click
	import sqlite3

	# all available columns in 'moz_places' table; used for click.Choice, below
	ALL_COLUMNS = 'id,url,title,rev_host,visit_count,hidden,typed,favicon_id,'\
	'frecency,last_visit_date,guid,foreign_count,url_hash,'\
	'description,preview_image_url'.split(',')

	# these columns will appear in the output unless otherwise specified w/ '-c'
	DEFAULT_COLUMNS = 'url,title,frecency,last_visit_date'.split(',')

	# search these columns for PATTERNs
	SEARCH_COLUMNS = ['url', 'title']

	# characters that aren't allowed in PATTERN (it's a non-matching class)
	BAD_CHARS = re.compile(r'[^-._/\w\s]')

	# this is the default database name, as of forever and a day; could change?
	PLACES_DB = os.getenv('DBFILE', 'places.sqlite')

	# this is the name of the table containing the profile's browsing history
	PLACES_TABLE = 'moz_places'


	@click.command(help='List Firefox history, sorted by last access date '
	"(by default). Searches 'url' and 'title' columns for "
	'PATTERN, if specified.')
	@click.option('-p', '--profile-path', envvar='FFPROFILE',
	type=click.Path(exists=True),
	help='Use this specific path to a Firefox profile. (default: '
	'use $FFPROFILE from the environment)')
	@click.option('-u', '--urls-only', is_flag=True,
	help='Display only URLs (and no other columns).')
	@click.option('-f', '--by-frecency', is_flag=True,
	help='Sort visited sites by most "frecent. (ref: '
	'https://en.wikipedia.org/wiki/Frecency)')
	@click.option('-c', '--column', 'columns', multiple=True, metavar='COL',
	default=DEFAULT_COLUMNS, type=click.Choice(ALL_COLUMNS),
	help='Show just one (or, if repeated, several) specific '
	"columns; use '-n' to get a list. [default: {}]"
	.format(', '.join(DEFAULT_COLUMNS)))
	@click.option('-h', '--headers', is_flag=True,
	help='Include column headers in output.')
	@click.option('-l', '--limit', type=int,
	help='Limit output to just this many results (after sorting).')
	@click.option('-a', '--and-patterns', is_flag=True,
	help='ANDs all PATTERNs given on the command line, rather than '
	'ORing them.')
	@click.option('-s', '--strip-query-string', is_flag=True,
	help="Strip the query string (anything after a '?') from URLs "
	'in the output.')
	@click.option('-n', '--column-names', is_flag=True,
	help="Print a list of all column names (for use with the '-c' "
	"option).")
	@click.option('-d', '--delimiter', metavar='DELIM',
	help='Use a custom output field delimiter. (default: tab)',
	default="\t")
	@click.option('--debug', is_flag=True,
	help='Print prepared SQL statements to stderr.')
	@click.argument('patterns', nargs=-1, required=False)
	@click.pass_context
	def list_ff_history(ctx, profile_path, urls_only, by_frecency, columns,
	headers, limit, and_patterns, strip_query_string,
	column_names, delimiter, debug, patterns):
	"""
	Emit the Firefox browsing history for the given profile, optionally
	matching a pattern, optionally with selected columns and sort order
	"""
	try:
	conn = sqlite3.connect(os.path.join(profile_path, PLACES_DB))
	except AttributeError:
	# this occurs when 'profile_path' is None (which happens sometimes
	# even in spite of the 'type=click.Path(exists=True)')
	click.secho('ERROR» ', nl=False, fg='red', err=True)
	click.secho("You should specify a profile with '-p'.", bold=True,
	err=True)
	ctx.abort()
	except IOError:
	click.secho('ERROR» ', nl=False, fg='red', err=True)
	click.secho("Could not open the SQLite database at '{}'."
	.format(profile_path), bold=True, err=True)
	ctx.abort()

	curs = conn.cursor()

	# just print the list of columns from the 'moz_places' table
	# this is (should be) the same as ALL_COLUMNS, but just for kicks:
	if column_names:
	curs.row_factory = sqlite3.Row
	curs.execute('SELECT * from moz_places')
	for column in curs.fetchone().keys():
	click.echo(column)
	return

	# otherwise...
	if urls_only:
	# because Click sends 'multiple=True' options back as tuples
	if list(columns) != DEFAULT_COLUMNS:
	click.secho('WARN» ', nl=False, fg='yellow', err=True)
	click.secho("The '--urls-only' option will override any "
	"'--column' options you specified.", bold=True,
	err=True)
	# otherwise:
	columns = ('url',)
	else:
	# places store Unix epoch microseconds
	# source: https://stackoverflow.com/a/19430099
	datefix = lambda x: x + '/1000000' if x == 'last_visit_date' else x
	columns = map(datefix, columns)

	# if there are multiple patterns, modify the default 'WHERE' clause:
	if not patterns:
	whereclause = "WHERE url LIKE 'http%'" if 'url' in columns else ''

	else:
	# put all the specified PATTERNs in a parenthesized clause
	_AND = ' AND '
	_OR = ' OR '
	wheres = []

	for p in patterns:
	# remove sneaky characters
	if re.search(BAD_CHARS, p):
	click.secho('WARN » ', fg='yellow', err=True, nl=False)
	click.secho('Funny characters removed from PATTERN; this '
	'may not match what you expect now.', bold=True,
	err=True)
	p = re.sub(BAD_CHARS, '', p)

	likes = ["{} LIKE '%{}%'".format(c, p) for c in SEARCH_COLUMNS]
	wheres.append('(' + _OR.join(likes) + ')')

	if and_patterns:
	whereclause = 'WHERE (' + _AND.join(wheres) + ')'
	else:
	whereclause = 'WHERE (' + _OR.join(wheres) + ')'

	# in all cases (unless 'url' was left out of the columns list, match
	# only http[s]:// URLs
	whereclause += " AND url LIKE 'http%'" if 'url' in columns else ''

	params = dict(
	columns=','.join(columns),
	tablename=PLACES_TABLE,
	whereclause=whereclause,
	sortcolumn='frecency' if by_frecency else 'last_visit_date',
	limit="LIMIT {}".format(limit) if limit else '',
	)

	stmt = "SELECT {columns} FROM {tablename} {whereclause} "\
	"ORDER BY {sortcolumn} DESC {limit};".format(**params)

	if debug:
	click.secho('DEBUG» ', nl=False, fg='magenta', err=True)
	click.secho(stmt, bold=True, err=True)

	curs.execute(stmt)

	if headers:
	click.echo(delimiter.join(columns))

	intfix = lambda x: "{:n}".format(x) if type(x) is int else x
	nonefix = lambda x: '<null>' if x is None else x
	stripqs = lambda x: re.sub(r'\?.*', '', x) if x.startswith('http') else x

	for row in curs.fetchall():
	# you could replace 'n' with ',' to commafy the output, if you wanted
	row = map(nonefix, map(intfix, row))

	if strip_query_string:
	row = map(stripqs, row)

	click.echo(delimiter.join(row))


	if __name__ == '__main__':
	list_ff_history()