Skip to content

Instantly share code, notes, and snippets.

@topisani
Forked from serif/bwclean2.py
Last active June 24, 2024 05:58
Show Gist options
  • Save topisani/066b63b87346afe76ffdf0998d4ebc2f to your computer and use it in GitHub Desktop.
Save topisani/066b63b87346afe76ffdf0998d4ebc2f to your computer and use it in GitHub Desktop.
Bitwarden Duplicate Entry Remover v2
#!/usr/bin/env python3
import sys
import csv
import hashlib
import json
from urllib.parse import urlparse
from datetime import datetime
def main(argv):
if len(argv) < 1:
sys.exit('Supply input file path as command argument')
in_path = argv[0]
json_out = '_out.json'
json_rem = '_rem.json'
out_path = in_path.rsplit('.json', 1)[0] + json_out
rem_path = in_path.rsplit('.json', 1)[0] + json_rem
hash_items = dict()
rem_items = list()
keep_items = list()
in_data = dict()
# Process file
with open(in_path, 'r', newline='', encoding='utf8') as in_file:
in_data = json.load(in_file)
items = in_data['items']
for item in items:
login = item.get('login', {})
uris = login.get('uris', [])
if not uris:
keep_items.append(item)
continue
username = str(login.get('username'))
password = str(login.get('password'))
for uri in uris:
domain = urlparse(uri.get('uri', '')).netloc
token = domain + username + password
hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
old = hash_items.get(hash, None)
if old is None:
hash_items[hash] = item
continue
if datetime.fromisoformat(old.get('revisionDate')) < datetime.fromisoformat(item.get('revisionDate')):
hash_items[hash] = item
rem_items.append(old)
else:
hash_items[hash] = old
rem_items.append(item)
print(f"found duplicate entry for uri='{domain}', username='{username}'")
out_data = in_data | { "items": list(hash_items.values()) + keep_items }
rem_data = in_data | { "items": rem_items }
with open(out_path, 'wt', newline='', encoding='utf8') as out_file, \
open(rem_path, 'wt', newline='', encoding='utf8') as rem_file:
json.dump(out_data, out_file, indent=4)
json.dump(rem_data, rem_file, indent=4)
# Report
print(f'\n{len(items)} total entries')
print(f'\nOutput file: {out_path}\n{len(hash_items)} unique entries deduplicated')
print(f'{len(keep_items)} items kept')
print(f'\nDuplicates saved to {rem_path}\n{len(rem_items)} entries removed')
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment