Skip to content

Instantly share code, notes, and snippets.

@dingmaotu
Last active September 11, 2023 03:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dingmaotu/7d0bd4355202320e4368d2ce5b8ef1a8 to your computer and use it in GitHub Desktop.
Save dingmaotu/7d0bd4355202320e4368d2ce5b8ef1a8 to your computer and use it in GitHub Desktop.
Find duplicates in your bookmarks
# -*- coding: utf-8 -*-
"""
Find duplicates in your bookmarks.
Usage:
python find_dup_bookmarks.py /path/to/your/Bookmarks
Chrome saves your bookmarks in a json file.
For macOS, it is `/Users/{your user name}/Libaray/Application Support/Chrome/Default/Bookmarks`.
For other OS, use google to find out.
"""
import sys
import json
def get_urls_with_paths(bookmark: dict, parent_path: str, res: dict[str, list[str]]):
if bookmark["type"] == "folder":
for item in bookmark["children"]:
get_urls_with_paths(item, parent_path + "/" + bookmark["name"], res)
elif bookmark["type"] == "url":
url = bookmark["url"]
path = parent_path + "/" + bookmark["name"] + ""
if url in res:
res[url].append(path)
else:
res[url] = [path]
else:
print(f"unknown type: {bookmark['type']}")
def print_duplicated_items(items: list[tuple[str, list[str]]]):
unique = len(items)
total = sum(len(v) for _, v in items)
if unique == 0:
print("Congrats! You have no duplication in your bookmarks!")
else:
print(
"There are {} duplicated items and it will go down to {} after deduplication. Please reivew:".format(
total, unique
)
)
for n1, (url, paths) in enumerate(items):
print("[{}] {}".format(n1 + 1, url))
for n2, path in enumerate(paths):
print("\t{}) {}".format(n2 + 1, path))
def main(path):
with open(path) as f:
bookmarks = json.load(f)
res = {}
for bookmark in bookmarks["roots"].values():
get_urls_with_paths(bookmark, "", res)
duplicated = list(filter(lambda x: len(x[1]) > 1, res.items()))
print_duplicated_items(duplicated)
if __name__ == "__main__":
# you can use argparse
# I just want a simple script... so no error checking
path = sys.argv[1]
main(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment