Skip to content

Instantly share code, notes, and snippets.

@ksamuel
Created December 20, 2019 12:16
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ksamuel/fb3af1345626cb66c474f38d8f037405 to your computer and use it in GitHub Desktop.
Save ksamuel/fb3af1345626cb66c474f38d8f037405 to your computer and use it in GitHub Desktop.
A quick and dirty python script to download all firefox bookmarks as stand alone static pages
"""
usage: download_bookmarks.py [-h] [--concurrency [CONCURRENCY]] [--directory DIRECTORY] bookmarks
positional arguments:
bookmarks The path to the sqlite db file containing
the bookmarks. It's the places.sqlite file
in your default profile dir.
optional arguments:
-h, --help show this help message and exit
--concurrency [CONCURRENCY], -c [CONCURRENCY]
Max number of bookmarks to process in parallel
--directory DIRECTORY, -d DIRECTORY
Directory to store the downloaded files. Will be recursively created if it doesn't exist. Otherwise,
a temp dir will be used.
"""
import argparse
import asyncio
import sqlite3
import sys
import time
from pathlib import Path
from tempfile import TemporaryDirectory
if not sys.version_info >= (3, 8):
sys.exit("This script requires Python 3.8 or higher")
async def download(url, directory):
print(f"Downloading: {url} - START")
proc = await asyncio.create_subprocess_shell(
f"wget -p -k -P {directory} {url}", stderr=asyncio.subprocess.PIPE
)
_, stderr = await proc.communicate()
if stderr:
print(f"Downloading: {url} - ERROR")
print(f"\n[stderr]\n{stderr.decode()}")
print(f"Downloading: {url} - DONE")
async def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"bookmarks",
help="The path to the sqlite db file containing the bookmarks. It's the places.sqlite file in your default profile dir.",
)
parser.add_argument(
"--concurrency",
"-c",
type=int,
nargs="?",
help="Max number of bookmarks to process in parallel",
default=40,
)
parser.add_argument(
"--directory",
"-d",
help="Directory to store the downloaded files. Will be recursively created if it doesn't exist. Otherwise, a temp dir will be used.",
)
args = parser.parse_args()
directory = args.directory or TemporaryDirectory().name
directory = Path(directory)
try:
directory.mkdir(exist_ok=True, parents=True)
except OSError as e:
sys.exit(f"Error while creating the output directory: {e}")
bookmark_file = Path(args.bookmarks)
if not bookmark_file.is_file():
sys.exit(f'Cannot find "{bookmark_file}"')
if not bookmark_file.name == "places.sqlite":
sys.exit(
f'The bookmark file should be a "place.sqlite" file, got "{bookmark_file}"'
)
with sqlite3.connect(bookmark_file) as con:
try:
urls = {
url
for (url,) in con.execute(
"""
SELECT url from moz_places,moz_bookmarks
WHERE moz_places.id=moz_bookmarks.fk;
"""
)
}
except sqlite3.OperationalError as e:
if "locked" in str(e):
sys.exit("Close Firefox before running this script")
raise
total = len(urls)
print(f"Ready to process {total} bookmarks")
print(f"Saving results in: {directory}")
if not args.directory:
print("Starting in 5 secs")
time.sleep(5)
running_tasks = set()
for i, url in enumerate(urls, 1):
if len(running_tasks) >= args.concurrency:
done, running_tasks = await asyncio.wait(
running_tasks, return_when=asyncio.FIRST_COMPLETED
)
running_tasks.add(download(url, directory))
print(f"Progress {i}/{total}")
await asyncio.wait(running_tasks)
print(f"Results saved in: {directory}")
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment