-
-
Save tmonjalo/33c4402b0d35f1233020bf427b5539fa to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3 | |
""" | |
List all Firefox tabs with title and URL | |
Supported input: json or jsonlz4 recovery files | |
Default output: title (URL) | |
Output format can be specified as argument | |
""" | |
import sys | |
import pathlib | |
import lz4.block | |
import json | |
path = pathlib.Path.home().joinpath('.mozilla/firefox') | |
files = path.glob('*default*/sessionstore-backups/recovery.js*') | |
try: | |
template = sys.argv[1] | |
except IndexError: | |
template = '%s (%s)' | |
for f in files: | |
b = f.read_bytes() | |
if b[:8] == b'mozLz40\0': | |
b = lz4.block.decompress(b[8:]) | |
j = json.loads(b) | |
for w in j['windows']: | |
for t in w['tabs']: | |
i = t['index'] - 1 | |
print(template % ( | |
t['entries'][i]['title'], | |
t['entries'][i]['url'] | |
)) |
Check out my fork for a small edit that makes it work on MacOS.
Magic!
great, thanks for this.
@goeblr : got an error with your vesion : TypeError: not all arguments converted during string formatting
any thoughts ? thanks
Thanks very much for sharing this, Thomas!
Yours wasn't exactly what I wanted, but it was very close, and the fact that you did all the hard work of figuring out how to get the window state data made it easy for me to find the last bits that I needed. I probably wouldn't have done the project if I'd had to find it all myself.
My needs are somewhat different: I usually have several Firefox windows open, sometimes with tabs for more than one project in one window. So I needed to find the right window, and don't usually need to know about the URL. So I produce a list of the windows and their tabs, with URL display optional. I also made the choice of profile a parameter, because I use a few different ones for different purposes. I've attached a copy in case anyone else is interested.
Thanks again,
Ran
`#! /usr/bin/env python3
"""
List all Firefox tabs with title and URL
Supported input: json or jsonlz4 recovery files
Default output:
window title
tab1 title
tab2 title
...
Display of URLs on lines following the tab titles
can be enabled via the "-u" argument
"""
import sys
import pathlib
import lz4.block
import json
import getopt
MY_VERSION = "0.1a"
def usage(my_name):
print("Usage: " + my_name + " [huV] [-p profile]")
print('''
Read a CSV file on stdin, transpose the rows and columns, write to stdout.
Options:
-h, --help - Display this help and exit
-V, --version - Display this program's version and exit
-u, --show_urls - Include the URLs in the output Default is don't.
-p, --profile - The Firefox profile you're using. Default is "default".
''')
def main(argv):
global MY_VERSION
EXIT_OK = 0
EXIT_ERROR = 1
show_urls = False
profile = "default"
try:
opts, args = getopt.getopt(argv[1 : ], "huVp:", ["help", "show_urls ", "version", "profile "])
except getopt.GetoptError as err:
# print help information and exit:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(EXIT_ERROR)
for o, a in opts:
if o in ("-V", "--version"):
print("Version " + MY_VERSION)
sys.exit(EXIT_OK)
elif o in ("-h", "--help"):
usage(argv[0])
sys.exit(EXIT_OK)
elif o in ("-u", "--show_urls "):
show_urls = True
elif o in ("-p", "--profile "):
profile = a
else:
usage(argv[0])
sys.exit(EXIT_ERROR)
path = pathlib.Path.home().joinpath('.mozilla/firefox')
files = path.glob('*' + profile + '*/sessionstore-backups/recovery.js*')
for f in files:
b = f.read_bytes()
if b[:8] == b'mozLz40\0':
b = lz4.block.decompress(b[8:])
j = json.loads(b)
for w in j['windows']:
# Lotsa " - 1"s here, because Firefox arrays are 1-origin
# Find the window title, which is the title of the active tab
t = w['tabs'][w['selected'] - 1]
print(t['entries'][t['index'] - 1]['title'])
# Print the tab titles
for t in w['tabs']:
i = t['index'] - 1
# print(template % (
print(" %s" % (
t['entries'][i]['title'],
))
if show_urls:
print(" %s" % (
t['entries'][i]['url'],
))
if name == "main":
main(sys.argv)
`
@goeblr : got an error with your vesion : TypeError: not all arguments converted during string formatting any thoughts ? thanks
Just a guess, but maybe the version of Firefox you're running is returning something other than a string for t['entries'][i]['title'] or t['entries'][i]['url']. Maybe a list or a tuple if there's something odd about the tab? I'd do a try/except, and print out type and size info for them on error.
The location for me was actually ~/snap/firefox/common/.mozilla/firefox/
. You can find your profile location by the following the steps here: https://support.mozilla.org/en-US/kb/profiles-where-firefox-stores-user-data
I was looking for a way to list my open Firefox tabs and found this after trying other solutions which were outdated. I use the Firefox flatpak with single window so I asked AI to refactor @RanTalbott's script and this seems to do what I want:
#! /usr/bin/env python3
import argparse
import pathlib
import lz4.block
import json
from urllib.parse import urlparse
def parse_arguments():
parser = argparse.ArgumentParser(description="Extract open tab titles and URLs from Firefox sessionstore backups.")
parser.add_argument('--profile', type=str, default='default', help="The Firefox profile name (default: 'default').")
parser.add_argument('--mozilla-path', type=str, default='~/.var/app/org.mozilla.firefox/.mozilla/firefox', help="The base path to the Firefox profiles directory (default: '~/.var/app/org.mozilla.firefox/.mozilla/firefox').")
return parser.parse_args()
def get_session_files(mozilla_path, profile):
path = pathlib.Path(mozilla_path).expanduser()
return path.glob(f'*{profile}*/sessionstore-backups/recovery.*')
def read_and_decompress_file(file_path):
try:
b = file_path.read_bytes()
if b.startswith(b'mozLz40\0'):
return lz4.block.decompress(b[8:])
else:
print(f"Skipping non-LZ4 file: {file_path}")
return None
except (lz4.block.LZ4BlockError, FileNotFoundError) as e:
print(f"Error reading or decompressing file {file_path}: {e}")
return None
def parse_json_data(data, file_path):
try:
return json.loads(data)
except json.JSONDecodeError as e:
print(f"Error parsing JSON data from file {file_path}: {e}")
return None
def extract_titles_from_session(session_data, unique_titles, file_path):
for window in session_data.get('windows', []):
if not isinstance(window, dict):
print(f"Invalid window structure in file {file_path}")
continue
for tab in window.get('tabs', []):
if not isinstance(tab, dict):
print(f"Invalid tab structure in file {file_path}")
continue
index = tab.get('index', 0) - 1
entries = tab.get('entries', [])
if not isinstance(entries, list):
print(f"Invalid entries structure in file {file_path}")
continue
if 0 <= index < len(entries):
entry = entries[index]
if not isinstance(entry, dict):
print(f"Invalid entry structure in file {file_path}")
continue
title = entry.get('title', 'Untitled')
url = entry.get('url', '')
if url:
try:
website = urlparse(url).netloc
if website.startswith("www."):
website = website[4:]
unique_titles.add(f"{website} - {title}")
except ValueError:
print(f"Invalid URL format in file {file_path}: {url}")
def main():
args = parse_arguments()
files = get_session_files(args.mozilla_path, args.profile)
unique_titles = set()
for f in files:
if not f.exists() or not f.is_file():
print(f"Skipping invalid or inaccessible file: {f}")
continue
data = read_and_decompress_file(f)
if data is None:
continue
session_data = parse_json_data(data, f)
if session_data is None:
continue
extract_titles_from_session(session_data, unique_titles, f)
for title in unique_titles:
print(title)
if __name__ == "__main__":
main()
I recommend using this as an alternative: https://github.com/balta2ar/brotab