Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hvisage/477ff9def86d460ae6c474686333fd43 to your computer and use it in GitHub Desktop.
Save hvisage/477ff9def86d460ae6c474686333fd43 to your computer and use it in GitHub Desktop.
Identifies and removes duplicate 'items' and 'folders' from your Bitwarden vault. πŸŽƒ
# Copyright Β© 2023 Justin McGettigan
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software
# and associated documentation files (the β€œSoftware”), to deal in the Software without
# restriction, including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED β€œAS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ---------------------------------------------------------------------------------------------
"""
Bitwarden Duplicate Cleaner
This script identifies and removes duplicate 'items' and 'folders' from your Bitwarden vault:
1. Identifies duplicate items based on their content, ignoring unique properties like ID and dates.
2. Deletes folders containing only duplicates, then removes the duplicate items themselves.
Setup:
1. Install Bitwarden CLI: https://bitwarden.com/help/cli/#download-and-install
2. (Optional) Manually log in with 'bw login' and ensure you have a session key set up.
- https://bitwarden.com/help/cli/#using-a-session-key
- By default, the script prompts you for login if no active session is detected.
3. BACKUP your vault using 'bw export': https://bitwarden.com/help/cli/#export
- I recommend using '--format json' as I've experienced issues when importing the default CSV format.
4. Execute the script. Note: Deleted items are recoverable for 30 days.
- Run with '--help' to see available flags and options for execution.
Note:
- By default, the newest version of an item is retained. Use '--oldest' flag to keep the oldest.
- Comparisons strip notes of leading/trailing whitespaces, reflecting Bitwarden's import behavior.
"""
import argparse
import hashlib
import json
import os
import getpass
import subprocess
from collections import defaultdict
missing_packages = []
try:
from colorama import Fore, Style, init
except ImportError:
missing_packages.append('colorama')
try:
from tqdm import tqdm
except ImportError:
missing_packages.append('tqdm')
if missing_packages:
print("The following required packages are not installed:", ', '.join(missing_packages))
print(f"Please install them using: \n\npip install {' '.join(missing_packages)}")
exit(1)
VERSION = "1.1.0"
# Get ANSI escapes to work on Windows.
init()
class BitwardenError(Exception):
"""
Custom exception class for Bitwarden-related errors.
Used to raise and catch specific errors related to Bitwarden operations within the script.
"""
pass
def style(text: str, color: str = Fore.WHITE, dim: bool = False, italic: bool = False) -> str:
"""
Styles a given text with the specified color and other text attributes using the colorama library.
Parameters:
- text (str): The original text to be styled.
- color (str, optional): The color to be applied to the text. Defaults to white.
- dim (bool, optional): If set to True, applies a dim effect to the text. Defaults to False.
- italic (bool, optional): If set to True, renders the text in italic. Defaults to False.
Returns:
- str: The styled text with color and specified attributes.
Note:
The terminal or console used to display the output must support the specified styles for them to be visible.
"""
style_codes = Style.DIM if dim else Style.NORMAL
style_codes = style_codes + '\033[3m' if italic else style_codes
return f"{color}{style_codes}{text}{Style.RESET_ALL}"
def dry_run_prefix(text: str) -> str:
"""
Modifies the provided message to indicate that it's part of a dry run.
Parameters:
- message (str): The original message.
Returns:
- str: The modified message with a prefix indicating a dry run.
"""
if args.dry_run:
prefix = style("[DRY-RUN] ", color=Fore.YELLOW)
return prefix + text
else:
return text
def print_separator(thick: bool = False):
"""
Prints a visual separator line to the console.
Parameters:
- thick (bool, optional): Whether to print a thick line or a thin line.
"""
print(style(("=" if thick else "-") * 50, dim=True))
def parse_arguments():
"""
Parses command-line arguments provided by the user when running the script.
Returns:
- argparse.Namespace: An object that contains the parsed arguments.
"""
parser = argparse.ArgumentParser(description="Remove duplicates from the Bitwarden vault.")
parser.add_argument('-s', '--no-sync', action='store_true',
help="Skip syncing the Bitwarden vault.")
parser.add_argument('-a', '--no-auth', action='store_true',
help="Assume the user has already authenticated and set the BW_SESSION.")
parser.add_argument('-d', '--dry-run', action='store_true',
help="Show which items/folders would be deleted without actually deleting them.")
parser.add_argument('-o', '--oldest', action='store_true',
help="Keep the oldest version of an item when detecting duplicates.")
parser.add_argument('-i', '--ignore-history', action='store_true',
help="Ignore the password history when detecting duplicate items.")
parser.add_argument('-e', '--empty-folders', action='store_true',
help='Identify empty folders that use the name of a non-empty folder as duplicate folders.')
parser.add_argument('-v', '--version', action='version', version=f'%(prog)s {VERSION}',
help="Show the version number and exit.")
return parser.parse_args()
def check_bw_installed():
"""
Verifies if Bitwarden CLI is installed and accessible.
Raises:
- BitwardenError: If Bitwarden CLI is not found or not executable.
"""
try:
subprocess.check_output(['bw', '--version'])
except subprocess.CalledProcessError:
raise BitwardenError("Bitwarden CLI is not installed or not available in PATH.")
def setup_bw_session():
"""
Prompts the user for their master password, unlocks Bitwarden, and sets up the BW_SESSION.
Raises:
- BitwardenError: If unable to establish a session.
"""
if args.no_auth:
raise BitwardenError()
while True:
# Prompt for the master password.
master_password = getpass.getpass(
prompt=f"{style('?', color=Fore.GREEN)} Master password: {style('[input is hidden]', dim=True, italic=True)} "
)
# Start the bw unlock process.
process = subprocess.Popen(
['bw', 'unlock'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, text=True, encoding='utf-8'
)
stdout, stderr = process.communicate(input=master_password)
# Search for the session key in the output.
session_key = None
for line in stdout.split('\n'):
if line.startswith('$ export BW_SESSION='):
session_key = line.split('"')[1]
break
if session_key:
os.environ["BW_SESSION"] = session_key
print(style("Bitwarden unlocked and BW_SESSION set.", color=Fore.GREEN))
break
else:
print(style("Failed to unlock Bitwarden or retrieve the session key.", color=Fore.RED))
if stderr:
print(f"Error: {stderr}")
retry = input("Do you want to try again? (y/n): ").strip().lower()
if retry != 'y':
raise BitwardenError()
def check_bw_session():
"""
Checks if a BW_SESSION is valid. If not, sets it up.
Raises:
- BitwardenError: If there's no valid session or if the session has expired.
"""
print('Checking Bitwarden session')
if os.environ.get("BW_SESSION"):
status_result = subprocess.run(['bw', 'status'], capture_output=True, text=True, encoding='utf-8')
status = json.loads(status_result.stdout)
if status["status"] == "unlocked":
print(style("Bitwarden session is valid and the vault is unlocked.", color=Fore.GREEN))
else:
print(style("BW_SESSION environment variable is present, but the vault is locked.", color=Fore.RED))
setup_bw_session()
else:
print(style("BW_SESSION environment variable not found.", color=Fore.RED))
setup_bw_session()
def sync_vault():
"""
Sync the Bitwarden vault with the server.
This function syncs the vault before checking for duplicates
and exits the script if syncing fails.
"""
print('Syncing vault')
sync_result = subprocess.run(['bw', 'sync'], capture_output=True, text=True, encoding='utf-8')
if sync_result.returncode == 0:
print(style(sync_result.stdout, color=Fore.GREEN))
else:
raise BitwardenError(f"Failed to sync vault. Error: {sync_result.stderr}")
def fetch_all_items() -> list[dict]:
"""
Fetches all items from the Bitwarden vault.
Returns:
- list[dict]: A list of dictionaries, each representing an item in the Bitwarden vault.
"""
output = subprocess.check_output(['bw', 'list', 'items'])
return json.loads(output)
def remove_unique_fields(item: dict) -> dict:
"""
Remove unique fields from a Bitwarden item dictionary.
Parameters:
- item (dict): The Bitwarden item dictionary.
Returns:
- dict: The modified item dictionary with unique fields removed.
"""
keys_to_remove = ['id', 'folderId', 'revisionDate', 'creationDate', 'deletedDate']
if args.ignore_history:
keys_to_remove.extend(['passwordHistory'])
login_data = item.get('login')
if login_data and 'passwordRevisionDate' in login_data:
del login_data['passwordRevisionDate']
for key in keys_to_remove:
item.pop(key, None)
return item
def normalize_notes_whitespace(item: dict) -> dict:
"""
Normalize the whitespace in the 'notes' field of a Bitwarden item.
Bitwarden's export functionality retains leading and trailing whitespaces in notes,
but upon import, it strips them. This function ensures that any comparison or
hashing of notes is consistent with Bitwarden's import behavior.
Parameters:
- item (dict): The Bitwarden item dictionary.
Returns:
- dict: The modified item dictionary with the notes field stripped.
"""
if item.get('notes'):
item['notes'] = item['notes'].strip()
return item
def calculate_item_hash(item: dict) -> str:
"""
Calculate a hash of the Bitwarden item dictionary.
Parameters:
- item (dict): The Bitwarden item dictionary.
Returns:
- str: The computed hash string of the modified item.
"""
return hashlib.sha256(str(item).encode('utf-8')).hexdigest()
def identify_duplicate_item(item: dict, item_dict: dict) -> dict | None:
"""
Identify if an item is a duplicate and update the item dictionary accordingly.
Parameters:
- item (dict): Bitwarden item to be checked.
- item_dict (dict): Dictionary with item hashes as keys and Bitwarden items as values.
Returns:
- dict | None: Returns the duplicate item if found, otherwise None.
"""
modified_item = remove_unique_fields(item.copy())
modified_item = normalize_notes_whitespace(modified_item)
item_hash = calculate_item_hash(modified_item)
if item_hash in item_dict:
# Check if we want to keep the oldest or the newest item.
if args.oldest:
comparison_result = item['revisionDate'] <= item_dict[item_hash]['revisionDate']
else:
comparison_result = item['revisionDate'] >= item_dict[item_hash]['revisionDate']
if comparison_result:
duplicate = item_dict[item_hash]
item_dict[item_hash] = item
return duplicate
else:
return item
else:
item_dict[item_hash] = item
return None
def identify_duplicate_items(items: list[dict]) -> list[dict]:
"""
Identify duplicate items from the Bitwarden vault.
Duplicate items are identified based on content, excluding unique properties.
The function prioritizes retaining the latest version of an item.
Returns:
- list[dict]: A list of dictionaries, each representing a duplicate item.
"""
print('Identifying duplicate items')
item_dict = {}
duplicates = []
for item in tqdm(items, ncols=90):
duplicate = identify_duplicate_item(item, item_dict)
if duplicate:
duplicates.append(duplicate)
if duplicates:
print(style(f'Found {len(duplicates)} duplicate items.', color=Fore.GREEN))
else:
if args.empty_folders:
print(style('No duplicate items found.', color=Fore.RED))
else:
raise BitwardenError("No duplicate items found.")
return duplicates
def count_duplicate_items_per_folder(duplicate_items: list[dict]) -> dict[str, int]:
"""
Counts the number of duplicate items in each folder.
Parameters:
- duplicate_items (list[dict]): A list of identified duplicate items.
Returns:
- dict[str, int]: A dictionary where keys are folder IDs and values represent
the number of duplicate items in that folder.
"""
folder_item_count = defaultdict(int)
for item in duplicate_items:
folder_item_count[item['folderId']] += 1
return folder_item_count
def fetch_all_folders() -> list[dict]:
"""
Fetches all folders from the Bitwarden vault.
Returns:
- list[dict]: A list of dictionaries, each representing a folder in the Bitwarden vault.
"""
output = subprocess.check_output(['bw', 'list', 'folders'])
return json.loads(output)
def get_items_in_folder(folder_id: str) -> list[dict]:
"""
Fetches all the items contained within a specific Bitwarden folder.
Parameters:
- folder_id (str): The ID of the folder for which to retrieve items.
Returns:
- list[dict]: A list of dictionaries, each representing an item within the specified folder.
"""
item_output = subprocess.check_output(['bw', 'list', 'items', '--folderid', folder_id])
return json.loads(item_output)
def folder_contains_only_duplicates(folder: dict, duplicate_folder_ids: set[str], folder_item_count: dict) -> bool:
"""
Determines if the provided folder consists exclusively of duplicate items.
Parameters:
- folder (dict): The folder to check.
- duplicate_folder_ids (set[str]): A set of folder IDs known to contain duplicate items.
- folder_item_count (dict): A mapping of folder IDs to their item counts.
Returns:
- bool: True if the folder only contains duplicate items, False otherwise.
"""
if folder['id'] not in duplicate_folder_ids:
return False
items_in_folder = get_items_in_folder(folder['id'])
return len(items_in_folder) == folder_item_count.get(folder['id'], 0)
def is_empty_folder_with_duplicate_name(folder: dict, folder_names: set[str]) -> bool:
"""
Checks if the provided folder is empty and shares a name with another folder.
Parameters:
- folder (dict): The folder to check.
- folder_names (set[str]): A set of names of folders to compare against.
Returns:
- bool: True if the folder is empty and shares a name with another folder, False otherwise.
"""
if folder['name'] in folder_names:
items_in_folder = get_items_in_folder(folder['id'])
return not items_in_folder
return False
def identify_duplicate_folders(folders: list[dict], duplicate_items: list[dict]) -> list[dict]:
"""
Identify folders in the vault that only contain duplicate items.
Parameters:
- duplicate_items (list[dict]): A list of identified duplicate items.
Returns:
- list[dict]: A list of dictionaries, each representing a duplicate folder.
"""
print('Identifying duplicate folders')
folder_item_count = count_duplicate_items_per_folder(duplicate_items)
# Create a set of folder IDs for the duplicate items for efficient lookup
duplicate_folder_ids = set(folder_item_count.keys())
if args.empty_folders:
folder_names = {folder['name'] for folder in folders if folder['name']}
duplicate_folders = []
for folder in tqdm(folders, ncols=90):
if not folder['id']:
continue
if folder_contains_only_duplicates(folder, duplicate_folder_ids, folder_item_count):
duplicate_folders.append(folder)
elif args.empty_folders and is_empty_folder_with_duplicate_name(folder, folder_names):
duplicate_folders.append(folder)
if duplicate_folders:
print(style(f'Found {len(duplicate_folders)} duplicate folders.', color=Fore.GREEN))
else:
if duplicate_items:
print(style('No duplicate folders found.', color=Fore.RED))
else:
raise BitwardenError("No duplicate folders found.")
return duplicate_folders
def delete_duplicate_folders(duplicate_folders: list[dict]):
"""
Delete the identified duplicate folders from the Bitwarden vault.
Parameters:
- duplicate_folders (list[dict]): A list of identified duplicate folders.
"""
print('Deleting duplicate folders')
for i, folder in enumerate(tqdm(duplicate_folders, ncols=90), 1):
if not args.dry_run:
subprocess.run(['bw', 'delete', 'folder', folder['id']])
tqdm.write(dry_run_prefix(f'{style("Deleted folder", Fore.RED)} "{folder["name"]}".'))
print(dry_run_prefix(style(f'Deleted {len(duplicate_folders)} duplicate folders.', color=Fore.GREEN)))
def delete_duplicate_items(duplicate_items: list[dict]):
"""
Delete the identified duplicate items from the Bitwarden vault.
Parameters:
- duplicate_items (list[dict]): A list of identified duplicate items.
"""
print('Deleting duplicate items')
for i, item in enumerate(tqdm(duplicate_items, ncols=90), 1):
if not args.dry_run:
subprocess.run(['bw', 'delete', 'item', item['id']])
tqdm.write(dry_run_prefix(f'{style("Deleted item", Fore.RED)} "{item["name"]}".'))
print(dry_run_prefix(style(f'Deleted {len(duplicate_items)} duplicate items.', color=Fore.GREEN)))
def print_summary(duplicate_items_count: int, total_items_count: int, duplicate_folders_count: int, total_folders_count: int):
"""
Print a summary of the cleanup results, highlighting duplicates found within items, folders, and overall.
Parameters:
- duplicate_items_count (int): The number of duplicate items found.
- total_items_count (int): The total number of items inspected.
- duplicate_folders_count (int): The number of duplicate folders found.
- total_folders_count (int): The total number of folders inspected.
The function prints a structured summary with color highlighting for easy visual distinction.
Green is used to indicate the presence of duplicates, whereas yellow signals no duplicates.
The summary provides:
- The number of duplicates and the total count for items and folders, respectively.
- An overall summary that combines the counts from both items and folders.
- Percentages indicating the proportion of duplicates in each category.
Example output:
==================================================
CLEANUP RESULTS
==================================================
β–Ά Duplicates Deleted:
πŸ“„ Items: 5 (of 100) - 5.00%
πŸ“ Folders: 2 (of 50) - 4.00%
πŸ—‘οΈ Overall: 7 (of 150) - 4.67%
==================================================
"""
overall_duplicates = duplicate_items_count + duplicate_folders_count
overall_total = total_items_count + total_folders_count
# Dictionary holding the relevant details for each line
data = {
'πŸ“„ Items': {
'duplicate': duplicate_items_count,
'total': total_items_count,
'percentage': duplicate_items_count / total_items_count * 100,
},
'πŸ“ Folders': {
'duplicate': duplicate_folders_count,
'total': total_folders_count,
'percentage': duplicate_folders_count / total_folders_count * 100 if total_folders_count > 0 else 0,
},
'πŸ—‘οΈ Overall': {
'duplicate': overall_duplicates,
'total': overall_total,
'percentage': overall_duplicates / overall_total * 100,
}
}
# Calculate max digits for alignment
max_label_width = max(len(label) for label in data.keys())
max_digits_col1 = max(len(str(detail['duplicate'])) for detail in data.values())
max_digits_col2 = max(len(str(detail['total'])) for detail in data.values())
max_digits_col3 = max(len(f"{detail['percentage']:.2f}") for detail in data.values())
# Helper function to format a single line
def format_line(label: str, duplicate: int, total: int, percentage: float):
"""
Generate a formatted summary line for the provided label, duplicate count, total count, and percentage.
Parameters:
- label (str): The descriptor for the line, e.g., "πŸ“„ Items", "πŸ“ Folders", or "πŸ—‘οΈ Overall".
- duplicate (int): The number of duplicates for the specified label.
- total (int): The total count for the specified label.
- percentage (float): The percentage of duplicates relative to the total count.
Returns:
- str: A formatted and color-coded string suitable for the cleanup summary display.
"""
label_str = f"{label}:".ljust(max_label_width + 2) # Left align label
duplicate_str = f"{duplicate:>{max_digits_col1}}" # Right align duplicate count
total_str = f"{total:>{max_digits_col2}}" # Right align total count
percentage_str = f"{percentage:.2f}%".ljust(max_digits_col3 + 1) # Left align percentage
# Now apply color
duplicate_color_str = style(duplicate_str, color=Fore.GREEN if duplicate > 0 else Fore.YELLOW)
percentage_color_str = style(percentage_str, color=Fore.GREEN if duplicate > 0 else Fore.YELLOW)
return f" {label_str} {duplicate_color_str} (of {total_str}) - {percentage_color_str}"
print("=" * 50)
print(dry_run_prefix(style("CLEANUP RESULTS", color=Fore.LIGHTMAGENTA_EX)))
print("=" * 50)
print("β–Ά Duplicates Deleted:")
# Loop over the data dictionary to print each line
for label, details in data.items():
print(format_line(label, details['duplicate'], details['total'], details['percentage']))
print("=" * 50)
if __name__ == '__main__':
try:
args = parse_arguments()
print("=" * 50)
print(style(f"Bitwarden Duplicate Cleaner - Version {VERSION}", color=Fore.LIGHTBLUE_EX))
print("A script that deletes duplicate items and folders.")
print("=" * 50)
# 0. Notify the user if running in dry-run mode.
if args.dry_run:
print(style("Running in [DRY-RUN] mode. No changes will be made to the vault.", color=Fore.YELLOW))
print_separator()
# 1. Check that Bitwarden CLI is installed.
check_bw_installed()
# 2. Check that Bitwarden CLI is logged in and has a valid session.
check_bw_session()
print_separator()
# 3. Sync the vault to ensure the latest data is available.
if not args.no_sync:
sync_vault()
print_separator()
# 4. Identify duplicate items.
items = fetch_all_items()
duplicate_items = identify_duplicate_items(items)
print_separator()
# 5. Identify duplicate folders.
folders = fetch_all_folders()
duplicate_folders = identify_duplicate_folders(folders, duplicate_items)
# 6. Delete duplicate folders.
if duplicate_folders:
print_separator()
delete_duplicate_folders(duplicate_folders)
# 7. Delete duplicate items.
if duplicate_items:
print_separator()
delete_duplicate_items(duplicate_items)
# 8. Print summary.
print_summary(len(duplicate_items), len(items), len(duplicate_folders), len(folders))
except KeyboardInterrupt:
print_separator()
print(f'{style("[CTRL+C]", color=Fore.RED)} {style("Exited script early.", color=Fore.BLUE)}')
except BitwardenError as e:
print(style(str(e), color=Fore.RED))
print_separator()
exit(style("Exited script early.", color=Fore.BLUE))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment