Skip to content

Instantly share code, notes, and snippets.

@tmonjalo
Created September 13, 2018 10:42
Show Gist options
  • Save tmonjalo/33c4402b0d35f1233020bf427b5539fa to your computer and use it in GitHub Desktop.
Save tmonjalo/33c4402b0d35f1233020bf427b5539fa to your computer and use it in GitHub Desktop.
List all Firefox tabs with title and URL
#! /usr/bin/env python3
"""
List all Firefox tabs with title and URL
Supported input: json or jsonlz4 recovery files
Default output: title (URL)
Output format can be specified as argument
"""
import sys
import pathlib
import lz4.block
import json
path = pathlib.Path.home().joinpath('.mozilla/firefox')
files = path.glob('*default*/sessionstore-backups/recovery.js*')
try:
template = sys.argv[1]
except IndexError:
template = '%s (%s)'
for f in files:
b = f.read_bytes()
if b[:8] == b'mozLz40\0':
b = lz4.block.decompress(b[8:])
j = json.loads(b)
for w in j['windows']:
for t in w['tabs']:
i = t['index'] - 1
print(template % (
t['entries'][i]['title'],
t['entries'][i]['url']
))
@mwalkerr
Copy link

mwalkerr commented Jun 9, 2023

The location for me was actually ~/snap/firefox/common/.mozilla/firefox/. You can find your profile location by the following the steps here: https://support.mozilla.org/en-US/kb/profiles-where-firefox-stores-user-data

@kth8
Copy link

kth8 commented Oct 2, 2024

I was looking for a way to list my open Firefox tabs and found this after trying other solutions which were outdated. I use the Firefox flatpak with single window so I asked AI to refactor @RanTalbott's script and this seems to do what I want:

#! /usr/bin/env python3
import argparse
import pathlib
import lz4.block
import json
from urllib.parse import urlparse

def parse_arguments():
    parser = argparse.ArgumentParser(description="Extract open tab titles and URLs from Firefox sessionstore backups.")
    parser.add_argument('--profile', type=str, default='default', help="The Firefox profile name (default: 'default').")
    parser.add_argument('--mozilla-path', type=str, default='~/.var/app/org.mozilla.firefox/.mozilla/firefox', help="The base path to the Firefox profiles directory (default: '~/.var/app/org.mozilla.firefox/.mozilla/firefox').")
    return parser.parse_args()

def get_session_files(mozilla_path, profile):
    path = pathlib.Path(mozilla_path).expanduser()
    return path.glob(f'*{profile}*/sessionstore-backups/recovery.*')

def read_and_decompress_file(file_path):
    try:
        b = file_path.read_bytes()
        if b.startswith(b'mozLz40\0'):
            return lz4.block.decompress(b[8:])
        else:
            print(f"Skipping non-LZ4 file: {file_path}")
            return None
    except (lz4.block.LZ4BlockError, FileNotFoundError) as e:
        print(f"Error reading or decompressing file {file_path}: {e}")
        return None

def parse_json_data(data, file_path):
    try:
        return json.loads(data)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON data from file {file_path}: {e}")
        return None

def extract_titles_from_session(session_data, unique_titles, file_path):
    for window in session_data.get('windows', []):
        if not isinstance(window, dict):
            print(f"Invalid window structure in file {file_path}")
            continue
        for tab in window.get('tabs', []):
            if not isinstance(tab, dict):
                print(f"Invalid tab structure in file {file_path}")
                continue
            index = tab.get('index', 0) - 1
            entries = tab.get('entries', [])
            if not isinstance(entries, list):
                print(f"Invalid entries structure in file {file_path}")
                continue
            if 0 <= index < len(entries):
                entry = entries[index]
                if not isinstance(entry, dict):
                    print(f"Invalid entry structure in file {file_path}")
                    continue
                title = entry.get('title', 'Untitled')
                url = entry.get('url', '')
                if url:
                    try:
                        website = urlparse(url).netloc
                        if website.startswith("www."):
                            website = website[4:]
                        unique_titles.add(f"{website} - {title}")
                    except ValueError:
                        print(f"Invalid URL format in file {file_path}: {url}")

def main():
    args = parse_arguments()
    files = get_session_files(args.mozilla_path, args.profile)
    unique_titles = set()
    for f in files:
        if not f.exists() or not f.is_file():
            print(f"Skipping invalid or inaccessible file: {f}")
            continue
        data = read_and_decompress_file(f)
        if data is None:
            continue
        session_data = parse_json_data(data, f)
        if session_data is None:
            continue
        extract_titles_from_session(session_data, unique_titles, f)
    for title in unique_titles:
        print(title)

if __name__ == "__main__":
    main()

@mabra
Copy link

mabra commented Dec 9, 2024

Thanks for this script.
Does someone know if and how it isposisble under Linux to get the workspace (number or name)
from a tab('s title)? Due to the nature of window hierarachy (so far I understand it), tools like
'xdotool' are unable to map a title to a workspace (works partially only) - can this script be of help?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment