Skip to content

Instantly share code, notes, and snippets.

@howird
Last active June 19, 2024 19:46
Show Gist options
  • Save howird/d0a2afe845f5c10d9f71c7b9ed6a3e60 to your computer and use it in GitHub Desktop.
Save howird/d0a2afe845f5c10d9f71c7b9ed6a3e60 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
BITWARDEN PASSWORD CONFLICT RESOLVER AND DUPLICATE REMOVER
----------------------------------------------------------
PREREQUISITES:
-------------
- a working Python 3 installation
- A CSV file export of your Bitwarden Data. Bitwarden Web -> Tools -> Export Data format: .csv
DISCLAIMER:
----------
I am not responsible for any data loss or security breaches.
I have used this script myself and it worked flawlessly for me.
For your peace of mind:
1. Make sure you have a backup of your Bitwarden data.
2. This script DOES NOT access the internet.
3. This script DOES NOT overwrite your export.
4. This script is open source and it contains various assertions throughout
the code to validate it working correctly. It confirms whether:
- the csv header and number of fields in each row/entry are valid
- the website-username combinations are consistent each time a
conflict is processed
- the number of entries exported is equal to the number of entries
in the input file
- each entry from the input is written exactly ONCE to an output file
"""
import os
import sys
import hashlib
from urllib.parse import urlparse
from collections import defaultdict
HANDLE_PASSWORD_CONFLICTS = True
# Fields in Bitwarden CSV
HEADER = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp'
FILE_EXT = '.csv'
HEADER_LIST = HEADER.split(',')
KEY2IDX = {k: i for i, k in enumerate(HEADER_LIST)}
def parse_bitwarden_export(in_path: str) -> list[list[str]]:
""" Parses Bitwarden export file and returns a list of entries.
Args:
in_path (str): path to the Bitwarden export file.
Returns:
list[list[str]] a each entry is a list of values corresponding to the fields in the header.
"""
assert in_path.endswith(FILE_EXT), f'Invalid file extension: {in_path}, must be a {FILE_EXT} file.'
with open(in_path, 'r', encoding='utf8') as f:
entries = []
for i, line in enumerate(f):
line = line.rstrip()
if i == 0:
assert (
line == HEADER,
f"Invalid header: {line}, should be: '{HEADER}'. "
"Bitwarden's export format may have changed."
"Consider updating the script, checking the input file, or contacting author."
)
continue
if not line:
print(f"WARNING: Empty line at index {i}.")
continue
entry = line.split(',')
assert len(entry) == len(HEADER_LIST), f"Invalid number of fields in entry {i}:\n{line}"
entries.append(entry)
return entries
def prune_duplicate_entries(entries: list[list[str]]) -> tuple[list[list[int]], list[int]]:
""" Prunes duplicate entries from the list of entries.
Args:
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
Returns:
list[list[int]]: each element is a list of indices to `entries` of with conflicting passwords.
list[int]: indices to `entries` of duplicate entries.
"""
site_user_pw_set = set()
site_user_dict: dict = defaultdict(list)
duplicate_indices = []
for i, entry in enumerate(entries):
# Generate an MD5 hash based on login URI, username, and password
domain = urlparse(entry[KEY2IDX['login_uri']]).netloc
if len(domain) > 0:
entry[KEY2IDX['login_uri']] = domain
else:
print(f"Invalid URI on line {i}:\n{','.join(entry)}")
token = entry[KEY2IDX['login_uri']] + entry[KEY2IDX['login_username']]
su_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
token += entry[KEY2IDX['login_password']]
sup_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
# if exact site-user-password hash has not been seen before..
if sup_hash not in site_user_pw_set:
# ..mark it as seen
site_user_pw_set.add(sup_hash)
# ..add it to the dictionary
site_user_dict[su_hash].append(i)
else:
# print(f"Duplicate entry on line {i}:\n{line}")
duplicate_indices.append(i)
return list(site_user_dict.values()), duplicate_indices
def clear() -> None:
"""
Clears the terminal screen and scroll back to present
the user with a nice clean, new screen. Useful for managing
menu screens in terminal applications.
"""
os.system('cls||echo -e \\\\033c')
def wait_for_user_input(user_input: str) -> None:
""" Waits for user input before continuing. """
if input(f"Type '{user_input}' to continue. ") == user_input:
print()
else:
sys.exit("Different response provided. Exiting.")
def prune_conflicting_passwords(conflicting_indices: list[list[int]], entries: list[list[str]], choose_first: bool = False) -> tuple[list[int], list[int]]:
""" Resolves conflicting passwords in entries from user input.
Args:
conflicting_indices (list[list[int]]): each element is a list of indices to `entries` of with conflicting passwords.
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
choose_first (bool): if True, no user input is required and the first password is chosen by default.
Returns:
list[int]: indices to `entries` of conflicting entries that were kept.
list[int]: indices to `entries` of conflicting entries that were removed.
"""
num_conflicts = 0
for indices in conflicting_indices:
num_options = len(indices)
assert num_options > 0, "This should not be possible, likely a bug in parse_duplicate_entries()."
if num_options > 1:
num_conflicts += 1
print(
f"Found {num_conflicts} conflicting passwords.",
"Choosing first password to resolve." if choose_first else "You will be prompted to choose between them."
)
wait_for_user_input('yes')
clear()
final_indices = []
invalid_indices = []
i_conflict = 0
for indices in conflicting_indices:
num_options = len(indices)
if num_options == 1 or choose_first:
final_indices.append(indices[0])
else:
curr_uri = entries[indices[0]][KEY2IDX['login_uri']]
curr_user = entries[indices[0]][KEY2IDX['login_username']]
for index in indices:
assert curr_uri == entries[index][KEY2IDX['login_uri']]
assert curr_user == entries[index][KEY2IDX['login_username']]
i_conflict += 1
print(f"Resolving conflict no. {i_conflict} of {num_conflicts}")
print(f"Duplicate entries for site '{curr_uri}' and username '{curr_user}':")
for i, index in enumerate(indices):
print(f"{i}: {entries[index][KEY2IDX['login_password']]}")
while True:
try:
user_input = int(input( "Enter the number corresponding to the password"
" you want to keep (Alternatively, -1 to keep none,"
" -2 to keep all, Ctrl+C to stop): "))
if -2 <= user_input < num_options:
break
else:
print(f"Invalid input. Please choose a number between 0 and {num_options - 1}.")
except ValueError:
print("Invalid input. Please enter a valid integer.")
clear()
for i, index in enumerate(indices):
if user_input == -2:
final_indices.append(index)
elif user_input == -1 or i != user_input:
invalid_indices.append(index)
else:
final_indices.append(index)
return final_indices, invalid_indices
def write_out(in_path: str, indices: list[int], entries: list[list[str]], written_entries: set[int], suffix: str = 'out') -> str:
""" Exports entries to a Bitwarden CSV file.
Args:
in_path (str): path to input file.
indices (list[int]): indices to `entries` to export.
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
written_entries (set[int]): only for bookkeeping. set of indices that have already been written to the output file.
suffix (str, optional): string to append to filename. Defaults to 'out'.
Returns:
str: path to the output file.
"""
csv_out = '_' + suffix + FILE_EXT
out_path = in_path.replace(FILE_EXT, csv_out)
with open(out_path, 'w', encoding='utf8') as out_file:
out_file.write(HEADER + '\n')
for i in indices:
out_file.write(','.join(entries[i]) + '\n')
assert i not in written_entries, f"Entry {i} has already been written to an output file."
written_entries.add(i)
return out_path
def main(in_path: str) -> None:
clear()
print(__doc__)
wait_for_user_input('yes')
clear()
entries = parse_bitwarden_export(in_path)
total_number_entries = len(entries)
print(f"Found {total_number_entries} entries in the input file.")
conflicting_indices, duplicate_indices = prune_duplicate_entries(entries)
print(f"Finished scanning for duplicates. Found {len(duplicate_indices)}.")
if HANDLE_PASSWORD_CONFLICTS:
print()
print("************ WARNING ************")
print("This script will now help you choose between multiple conflicting passwords")
print("assigned to the same website and user.")
print("To do so, the script will print your passwords in plaintext.")
print("Make sure you are using a secure device and that you are in")
print("a secure location where no one can see your screen.")
print("*********************************")
wait_for_user_input('yes')
final_indices, invalid_indices = prune_conflicting_passwords(conflicting_indices, entries)
else:
import itertools # for flattening the list of lists
final_indices, invalid_indices = list(itertools.chain.from_iterable(conflicting_indices)), []
written_entries = set()
final_path = write_out(in_path, final_indices, entries, written_entries, 'final')
dup_path = write_out(in_path, duplicate_indices, entries, written_entries, 'duplicates')
if HANDLE_PASSWORD_CONFLICTS:
inv_path = write_out(in_path, invalid_indices, entries, written_entries, 'conflicts')
num_processed_entries = len(final_indices) + len(duplicate_indices) + len(invalid_indices)
assert num_processed_entries == total_number_entries, f"Entries lost during processing: {num_processed_entries}/{total_number_entries}"
print(f'{len(final_indices)} unique entries saved to {final_path}.')
print(f'{len(duplicate_indices)} duplicate entries saved to {dup_path}.')
if HANDLE_PASSWORD_CONFLICTS:
print(f'{len(invalid_indices)} invalid entries saved to {inv_path}.')
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit('Supply input file path as command argument')
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment