Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ewa/f5e115628b955bf8cd1e0540116b135a to your computer and use it in GitHub Desktop.
Save ewa/f5e115628b955bf8cd1e0540116b135a to your computer and use it in GitHub Desktop.
Use the BitWarden command-line client to purge duplicate entries from your vault. NOTE: this is my one-time-use variant, and the original has been improved significantly since I forked it, so you should probably look there.
#!/usr/bin/env python3
# This script will pull all of your vault 'items' using the 'bw list items' command and then it will compare
# all properties that are not inherently unique from the returned JSON to determine if they are duplicates.
# Note: It removes older duplicates first - the newest copy of the 'item' will be the only one to remain.
# You can simply flip the '>' sign to '<' if you want to preserve the oldest 'item' instead.
#
# Setup Steps
# 1. You must install Bitwarden CLI first: https://bitwarden.com/help/cli/#download-and-install
# 2. Login to the CLI with the 'bw login' command. You need your session key setup before continuing: https://bitwarden.com/help/cli/#using-a-session-key
# 3. Make sure to backup your 'items'. You can use the 'bw export' command to do so: https://bitwarden.com/help/cli/#export
# 4. Run this python script and your duplicate 'items' will start being deleted. https://bitwarden.com/help/cli/#delete
# Note: I am NOT using the '--permanent' flag. This means you can restore anything this script deletes within 30 days.
# Note2: The deletion process is pretty slow (1-2/items per second) so you'll likely need to let it run for a while.
# Revision history:
# March 28, 2023: Justin McGettigan: shared gist on github as https://gist.github.com/jwmcgettigan/0bf7cd39947764896735997056ca74d7 -- no specific license given
# Sept. 22, 2023: Eric Anderson: Working on extended fork at https://gist.github.com/ewa/f5e115628b955bf8cd1e0540116b135a
import sys
import os
import json
import hashlib
import subprocess
import argparse
##
## Eric Anderson extensions: For safety, don't actually *do* anything unless specifically told to
##
parser = argparse.ArgumentParser()
parser.add_argument('--really', '-Y', action='store_true', help='Go ahead and make potentially-destructive changes to bitwarden vault')
parser.add_argument('--session', '-s', action='store', type=str, metavar='KEY', help='Use session key KEY. Note that there are security implications to providing the key on the command line, so think about it. Setting the BW_SESSION environment variable is preferred.')
parser.add_argument('--interactive', '-i', action='store_true', help='Log in interactively (if needed)')
args = parser.parse_args()
## Sanity check
if args.session is not None and args.interactive == True:
parser.error("Using --session (-s) and --interactive(-i) together makes no sense.")
## Do this as a closure so we don't have to pass around (or accidentally change) the 'confirmed' variable
def make_delete(confirmed):
""" Returns a delete function that's only "active" if the 'confirmed' parameter to make_delete is True """
assert isinstance(confirmed, bool)
def do_it(*args):
del_args = list(args)
sub_run_args = ['bw', 'delete'] + del_args
if confirmed:
subprocess.run(sub_run_args)
else:
print(f"** would have done: subprocess.run({sub_run_args})")
return do_it
bw_delete = make_delete(args.really)
##
## Check for session key, in the following order of precendence:
## BW_SESSION variable, --session command-line argument, interactive login
##
## Interactive login is brittle -- it's ungraceful (but safe) if you
## fail to log in, if you're already logged, or really anything else
## weird.
##
if 'BW_SESSION' in os.environ:
print("Using session key from BW_SESSION env var", file=sys.stderr)
if (args.session is not None) or (args.interactive == True):
print("WARNING, you specified --session and/or --interactive, but BW_SESSION supercedes these", file=sys.stderr)
elif args.session is not None:
os.environ['BW_SESSION'] = args.session
print("Using session key from command-line argument", file=sys.stderr)
elif args.interactive == True:
cmdline=['bw', 'login', '--raw']
max_stdout_size=4096 # I made this up, but the session key is way shorter than this.
print(f"starting '{' '.join(cmdline)}'. Only the session key will be exposed to this script", file=sys.stderr)
with subprocess.Popen(cmdline, bufsize=max_stdout_size, stdin=None, stderr=None,
stdout=subprocess.PIPE, # This ONLY works because bw uses stderr for interaction but writes the result to stdout
) as proc:
proc.wait()
if proc.returncode != 0:
raise ValueError(f"bw login returned {proc.returncode} meaning something went wrong. Possibly you are already logged in, but did not provide the session key? If so, either provide it in BW_SESSION or via --session, or log out.")
output=proc.stdout.read()
if not (isinstance(output, bytes) and len(output)>0):
raise ValueError(f"output of {' '.join(cmdline)} does not make sense: {repr(output)}", output, proc)
os.environ['BW_SESSION']=output.decode()
item_dict = {}
# Get the JSON data for each item in the vault
cmd_results = subprocess.run(['bw', 'list', 'items'], capture_output=True)
if cmd_results.returncode !=0:
if cmd_results.returncode==1 and cmd_results.stderr==b'You are not logged in.':
print("BitWarden says you are not logged in.", file=sys.stderr)
print("Log in using 'bw login' and/or 'bw unlock' and export the session key as BW_SESSION", file=sys.stderr)
print("See https://bitwarden.com/help/cli/#using-a-session-key", file=sys.stderr)
else:
print(f"bw list items gave an error return code ({cmd_results.returncode}) and an error message this script did not recognize:", file=sys.stderr)
print(f"\"{cmd_results.stderr.decode()}\"", file=sys.stderr)
print("You'll have to debug this yourself",file=sys.stderr)
sys.exit(cmd_results.returncode)
output=cmd_results.stdout
items = json.loads(output)
print(repr(args))
for item in items:
# Remove unique fields from the item data
item_data = item.copy()
del item_data['id']
del item_data['folderId']
del item_data['revisionDate']
del item_data['creationDate']
del item_data['deletedDate']
# Calculate a hash of the item data
item_hash = hashlib.sha256(str(item_data).encode('utf-8')).hexdigest()
# Check if we've seen this item before
if item_hash in item_dict:
# Compare the revisionDate to see which item is newer
if item['revisionDate'] > item_dict[item_hash]['revisionDate']:
print(f'Duplicate item found: {item["name"]}')
bw_delete('item', item_dict[item_hash]['id'])
print(f'Deleted older item "{item_dict[item_hash]["name"]}".')
item_dict[item_hash] = item
else:
print(f'Duplicate item found: {item["name"]}')
bw_delete('item', item['id'])
print(f'Deleted older item "{item["name"]}".')
else:
item_dict[item_hash] = item
@ewa
Copy link
Author

ewa commented Sep 24, 2023

This is not the script you're looking for.

This is a fork of an older version of
jwmcgettigan / bitwarden_duplicate_cleaner.py. The original author, @jwmcgettigan, extended and refactored his version to the point where merging my changes back in is non-trivial, so I haven't tried, but the "real" version is almost certainly better for your purposes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment