howird/bitwarden_password_tool.py

## bitwarden_password_tool.py
#!/usr/bin/env python3
"""
BITWARDEN PASSWORD CONFLICT RESOLVER AND DUPLICATE REMOVER
----------------------------------------------------------

PREREQUISITES:
-------------
- a working Python 3 installation
- A CSV file export of your Bitwarden Data. Bitwarden Web -> Tools -> Export Data format: .csv

DISCLAIMER:
----------
I am not responsible for any data loss or security breaches.
I have used this script myself and it worked flawlessly for me.
For your peace of mind:
  1. Make sure you have a backup of your Bitwarden data.
  2. This script DOES NOT access the internet.
  3. This script DOES NOT overwrite your export.
  4. This script is open source and it contains various assertions throughout
  the code to validate it working correctly. It confirms whether:
    - the csv header and number of fields in each row/entry are valid
    - the website-username combinations are consistent each time a
      conflict is processed
    - the number of entries exported is equal to the number of entries
      in the input file
    - each entry from the input is written exactly ONCE to an output file
"""

import os
import sys
import hashlib
from urllib.parse import urlparse
from collections import defaultdict


HANDLE_PASSWORD_CONFLICTS = True

# Fields in Bitwarden CSV
HEADER = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp'
FILE_EXT = '.csv'

HEADER_LIST = HEADER.split(',')
KEY2IDX = {k: i for i, k in enumerate(HEADER_LIST)}


def parse_bitwarden_export(in_path: str) -> list[list[str]]:
    """ Parses Bitwarden export file and returns a list of entries.

        Args:
            in_path (str): path to the Bitwarden export file.

        Returns:
            list[list[str]] a each entry is a list of values corresponding to the fields in the header.
    """
    assert in_path.endswith(FILE_EXT), f'Invalid file extension: {in_path}, must be a {FILE_EXT} file.'
    with open(in_path, 'r', encoding='utf8') as f:
        entries = []
        for i, line in enumerate(f):
            line = line.rstrip()

            if i == 0:
                assert (
                    line == HEADER,
                    f"Invalid header: {line}, should be: '{HEADER}'. "
                    "Bitwarden's export format may have changed."
                    "Consider updating the script, checking the input file, or contacting author."
                )
                continue

            if not line:
                print(f"WARNING: Empty line at index {i}.")
                continue

            entry = line.split(',')
            assert len(entry) == len(HEADER_LIST), f"Invalid number of fields in entry {i}:\n{line}"

            entries.append(entry)

    return entries


def prune_duplicate_entries(entries: list[list[str]]) -> tuple[list[list[int]], list[int]]:
    """ Prunes duplicate entries from the list of entries.

    Args:
        entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.

    Returns:
        list[list[int]]: each element is a list of indices to `entries` of with conflicting passwords.
        list[int]: indices to `entries` of duplicate entries.
    """
    site_user_pw_set = set()
    site_user_dict: dict = defaultdict(list)
    duplicate_indices = []

    for i, entry in enumerate(entries):
        # Generate an MD5 hash based on login URI, username, and password
        domain = urlparse(entry[KEY2IDX['login_uri']]).netloc
        if len(domain) > 0:
            entry[KEY2IDX['login_uri']] = domain
        else:
            print(f"Invalid URI on line {i}:\n{','.join(entry)}")

        token = entry[KEY2IDX['login_uri']] + entry[KEY2IDX['login_username']]
        su_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
        token += entry[KEY2IDX['login_password']]
        sup_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()

        # if exact site-user-password hash has not been seen before..
        if sup_hash not in site_user_pw_set:
            # ..mark it as seen
            site_user_pw_set.add(sup_hash)
            # ..add it to the dictionary
            site_user_dict[su_hash].append(i)
        else:
            # print(f"Duplicate entry on line {i}:\n{line}")
            duplicate_indices.append(i)

    return list(site_user_dict.values()), duplicate_indices


def clear() -> None:
    """
    Clears the terminal screen and scroll back to present
    the user with a nice clean, new screen. Useful for managing
    menu screens in terminal applications.
    """
    os.system('cls||echo -e \\\\033c')


def wait_for_user_input(user_input: str) -> None:
    """ Waits for user input before continuing. """
    if input(f"Type '{user_input}' to continue. ") == user_input:
        print()
    else:
        sys.exit("Different response provided. Exiting.")


def prune_conflicting_passwords(conflicting_indices: list[list[int]], entries: list[list[str]], choose_first: bool = False) -> tuple[list[int], list[int]]:
    """ Resolves conflicting passwords in entries from user input.

    Args:
        conflicting_indices (list[list[int]]): each element is a list of indices to `entries` of with conflicting passwords.
        entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
        choose_first (bool): if True, no user input is required and the first password is chosen by default.

    Returns:
        list[int]: indices to `entries` of conflicting entries that were kept.
        list[int]: indices to `entries` of conflicting entries that were removed.
    """

    num_conflicts = 0
    for indices in conflicting_indices:
        num_options = len(indices)
        assert num_options > 0, "This should not be possible, likely a bug in parse_duplicate_entries()."
        if num_options > 1:
            num_conflicts += 1
    print(
        f"Found {num_conflicts} conflicting passwords.",
        "Choosing first password to resolve." if choose_first else "You will be prompted to choose between them."
    )
    wait_for_user_input('yes')
    clear()

    final_indices = []
    invalid_indices = []
    i_conflict = 0
    for indices in conflicting_indices:
        num_options = len(indices)

        if num_options == 1 or choose_first:
            final_indices.append(indices[0])
        else:
            curr_uri = entries[indices[0]][KEY2IDX['login_uri']]
            curr_user = entries[indices[0]][KEY2IDX['login_username']]
            for index in indices:
                assert curr_uri == entries[index][KEY2IDX['login_uri']]
                assert curr_user == entries[index][KEY2IDX['login_username']]

            i_conflict += 1
            print(f"Resolving conflict no. {i_conflict} of {num_conflicts}")
            print(f"Duplicate entries for site '{curr_uri}' and username '{curr_user}':")
            for i, index in enumerate(indices):
                print(f"{i}: {entries[index][KEY2IDX['login_password']]}")

            while True:
                try:
                    user_input = int(input( "Enter the number corresponding to the password"
                                            " you want to keep (Alternatively, -1 to keep none,"
                                            " -2 to keep all, Ctrl+C to stop): "))
                    if -2 <= user_input < num_options:
                        break
                    else:
                        print(f"Invalid input. Please choose a number between 0 and {num_options - 1}.")
                except ValueError:
                    print("Invalid input. Please enter a valid integer.")
            clear()

            for i, index in enumerate(indices):
                if user_input == -2:
                    final_indices.append(index)
                elif user_input == -1 or i != user_input:
                    invalid_indices.append(index)
                else:
                    final_indices.append(index)

    return final_indices, invalid_indices


def write_out(in_path: str, indices: list[int], entries: list[list[str]], written_entries: set[int], suffix: str = 'out') -> str:
    """ Exports entries to a Bitwarden CSV file.

    Args:
        in_path (str): path to input file.
        indices (list[int]): indices to `entries` to export.
        entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
        written_entries (set[int]): only for bookkeeping. set of indices that have already been written to the output file.
        suffix (str, optional): string to append to filename. Defaults to 'out'.

    Returns:
        str: path to the output file.
    """
    csv_out = '_' + suffix + FILE_EXT
    out_path = in_path.replace(FILE_EXT, csv_out)
    with open(out_path, 'w', encoding='utf8') as out_file:
        out_file.write(HEADER + '\n')
        for i in indices:
            out_file.write(','.join(entries[i]) + '\n')
            assert i not in written_entries, f"Entry {i} has already been written to an output file."
            written_entries.add(i)
    return out_path


def main(in_path: str) -> None:
    clear()
    print(__doc__)
    wait_for_user_input('yes')
    clear()

    entries = parse_bitwarden_export(in_path)
    total_number_entries = len(entries)
    print(f"Found {total_number_entries} entries in the input file.")

    conflicting_indices, duplicate_indices = prune_duplicate_entries(entries)
    print(f"Finished scanning for duplicates. Found {len(duplicate_indices)}.")

    if HANDLE_PASSWORD_CONFLICTS:
        print()
        print("************ WARNING ************")
        print("This script will now help you choose between multiple conflicting passwords")
        print("assigned to the same website and user.")
        print("To do so, the script will print your passwords in plaintext.")
        print("Make sure you are using a secure device and that you are in")
        print("a secure location where no one can see your screen.")
        print("*********************************")
        wait_for_user_input('yes')
        final_indices, invalid_indices = prune_conflicting_passwords(conflicting_indices, entries)
    else:
        import itertools # for flattening the list of lists
        final_indices, invalid_indices = list(itertools.chain.from_iterable(conflicting_indices)), []

    written_entries = set()
    final_path = write_out(in_path, final_indices, entries, written_entries, 'final')
    dup_path = write_out(in_path, duplicate_indices, entries, written_entries, 'duplicates')
    if HANDLE_PASSWORD_CONFLICTS:
        inv_path = write_out(in_path, invalid_indices, entries, written_entries, 'conflicts')

    num_processed_entries = len(final_indices) + len(duplicate_indices) + len(invalid_indices)
    assert num_processed_entries == total_number_entries, f"Entries lost during processing: {num_processed_entries}/{total_number_entries}"
    print(f'{len(final_indices)} unique entries saved to {final_path}.')
    print(f'{len(duplicate_indices)} duplicate entries saved to {dup_path}.')
    if HANDLE_PASSWORD_CONFLICTS:
        print(f'{len(invalid_indices)} invalid entries saved to {inv_path}.')


if __name__ == "__main__":
    if len(sys.argv) < 2:
        sys.exit('Supply input file path as command argument')
    main(sys.argv[1])
	#!/usr/bin/env python3
	"""
	BITWARDEN PASSWORD CONFLICT RESOLVER AND DUPLICATE REMOVER
	----------------------------------------------------------

	PREREQUISITES:
	-------------
	- a working Python 3 installation
	- A CSV file export of your Bitwarden Data. Bitwarden Web -> Tools -> Export Data format: .csv

	DISCLAIMER:
	----------
	I am not responsible for any data loss or security breaches.
	I have used this script myself and it worked flawlessly for me.
	For your peace of mind:
	1. Make sure you have a backup of your Bitwarden data.
	2. This script DOES NOT access the internet.
	3. This script DOES NOT overwrite your export.
	4. This script is open source and it contains various assertions throughout
	the code to validate it working correctly. It confirms whether:
	- the csv header and number of fields in each row/entry are valid
	- the website-username combinations are consistent each time a
	conflict is processed
	- the number of entries exported is equal to the number of entries
	in the input file
	- each entry from the input is written exactly ONCE to an output file
	"""

	import os
	import sys
	import hashlib
	from urllib.parse import urlparse
	from collections import defaultdict


	HANDLE_PASSWORD_CONFLICTS = True

	# Fields in Bitwarden CSV
	HEADER = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp'
	FILE_EXT = '.csv'

	HEADER_LIST = HEADER.split(',')
	KEY2IDX = {k: i for i, k in enumerate(HEADER_LIST)}


	def parse_bitwarden_export(in_path: str) -> list[list[str]]:
	""" Parses Bitwarden export file and returns a list of entries.

	Args:
	in_path (str): path to the Bitwarden export file.

	Returns:
	list[list[str]] a each entry is a list of values corresponding to the fields in the header.
	"""
	assert in_path.endswith(FILE_EXT), f'Invalid file extension: {in_path}, must be a {FILE_EXT} file.'
	with open(in_path, 'r', encoding='utf8') as f:
	entries = []
	for i, line in enumerate(f):
	line = line.rstrip()

	if i == 0:
	assert (
	line == HEADER,
	f"Invalid header: {line}, should be: '{HEADER}'. "
	"Bitwarden's export format may have changed."
	"Consider updating the script, checking the input file, or contacting author."
	)
	continue

	if not line:
	print(f"WARNING: Empty line at index {i}.")
	continue

	entry = line.split(',')
	assert len(entry) == len(HEADER_LIST), f"Invalid number of fields in entry {i}:\n{line}"

	entries.append(entry)

	return entries


	def prune_duplicate_entries(entries: list[list[str]]) -> tuple[list[list[int]], list[int]]:
	""" Prunes duplicate entries from the list of entries.

	Args:
	entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.

	Returns:
	list[list[int]]: each element is a list of indices to `entries` of with conflicting passwords.
	list[int]: indices to `entries` of duplicate entries.
	"""
	site_user_pw_set = set()
	site_user_dict: dict = defaultdict(list)
	duplicate_indices = []

	for i, entry in enumerate(entries):
	# Generate an MD5 hash based on login URI, username, and password
	domain = urlparse(entry[KEY2IDX['login_uri']]).netloc
	if len(domain) > 0:
	entry[KEY2IDX['login_uri']] = domain
	else:
	print(f"Invalid URI on line {i}:\n{','.join(entry)}")

	token = entry[KEY2IDX['login_uri']] + entry[KEY2IDX['login_username']]
	su_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
	token += entry[KEY2IDX['login_password']]
	sup_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()

	# if exact site-user-password hash has not been seen before..
	if sup_hash not in site_user_pw_set:
	# ..mark it as seen
	site_user_pw_set.add(sup_hash)
	# ..add it to the dictionary
	site_user_dict[su_hash].append(i)
	else:
	# print(f"Duplicate entry on line {i}:\n{line}")
	duplicate_indices.append(i)

	return list(site_user_dict.values()), duplicate_indices


	def clear() -> None:
	"""
	Clears the terminal screen and scroll back to present
	the user with a nice clean, new screen. Useful for managing
	menu screens in terminal applications.
	"""
	os.system('cls\|\|echo -e \\\\033c')


	def wait_for_user_input(user_input: str) -> None:
	""" Waits for user input before continuing. """
	if input(f"Type '{user_input}' to continue. ") == user_input:
	print()
	else:
	sys.exit("Different response provided. Exiting.")


	def prune_conflicting_passwords(conflicting_indices: list[list[int]], entries: list[list[str]], choose_first: bool = False) -> tuple[list[int], list[int]]:
	""" Resolves conflicting passwords in entries from user input.

	Args:
	conflicting_indices (list[list[int]]): each element is a list of indices to `entries` of with conflicting passwords.
	entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
	choose_first (bool): if True, no user input is required and the first password is chosen by default.

	Returns:
	list[int]: indices to `entries` of conflicting entries that were kept.
	list[int]: indices to `entries` of conflicting entries that were removed.
	"""

	num_conflicts = 0
	for indices in conflicting_indices:
	num_options = len(indices)
	assert num_options > 0, "This should not be possible, likely a bug in parse_duplicate_entries()."
	if num_options > 1:
	num_conflicts += 1
	print(
	f"Found {num_conflicts} conflicting passwords.",
	"Choosing first password to resolve." if choose_first else "You will be prompted to choose between them."
	)
	wait_for_user_input('yes')
	clear()

	final_indices = []
	invalid_indices = []
	i_conflict = 0
	for indices in conflicting_indices:
	num_options = len(indices)

	if num_options == 1 or choose_first:
	final_indices.append(indices[0])
	else:
	curr_uri = entries[indices[0]][KEY2IDX['login_uri']]
	curr_user = entries[indices[0]][KEY2IDX['login_username']]
	for index in indices:
	assert curr_uri == entries[index][KEY2IDX['login_uri']]
	assert curr_user == entries[index][KEY2IDX['login_username']]

	i_conflict += 1
	print(f"Resolving conflict no. {i_conflict} of {num_conflicts}")
	print(f"Duplicate entries for site '{curr_uri}' and username '{curr_user}':")
	for i, index in enumerate(indices):
	print(f"{i}: {entries[index][KEY2IDX['login_password']]}")

	while True:
	try:
	user_input = int(input( "Enter the number corresponding to the password"
	" you want to keep (Alternatively, -1 to keep none,"
	" -2 to keep all, Ctrl+C to stop): "))
	if -2 <= user_input < num_options:
	break
	else:
	print(f"Invalid input. Please choose a number between 0 and {num_options - 1}.")
	except ValueError:
	print("Invalid input. Please enter a valid integer.")
	clear()

	for i, index in enumerate(indices):
	if user_input == -2:
	final_indices.append(index)
	elif user_input == -1 or i != user_input:
	invalid_indices.append(index)
	else:
	final_indices.append(index)

	return final_indices, invalid_indices


	def write_out(in_path: str, indices: list[int], entries: list[list[str]], written_entries: set[int], suffix: str = 'out') -> str:
	""" Exports entries to a Bitwarden CSV file.

	Args:
	in_path (str): path to input file.
	indices (list[int]): indices to `entries` to export.
	entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header.
	written_entries (set[int]): only for bookkeeping. set of indices that have already been written to the output file.
	suffix (str, optional): string to append to filename. Defaults to 'out'.

	Returns:
	str: path to the output file.
	"""
	csv_out = '_' + suffix + FILE_EXT
	out_path = in_path.replace(FILE_EXT, csv_out)
	with open(out_path, 'w', encoding='utf8') as out_file:
	out_file.write(HEADER + '\n')
	for i in indices:
	out_file.write(','.join(entries[i]) + '\n')
	assert i not in written_entries, f"Entry {i} has already been written to an output file."
	written_entries.add(i)
	return out_path


	def main(in_path: str) -> None:
	clear()
	print(__doc__)
	wait_for_user_input('yes')
	clear()

	entries = parse_bitwarden_export(in_path)
	total_number_entries = len(entries)
	print(f"Found {total_number_entries} entries in the input file.")

	conflicting_indices, duplicate_indices = prune_duplicate_entries(entries)
	print(f"Finished scanning for duplicates. Found {len(duplicate_indices)}.")

	if HANDLE_PASSWORD_CONFLICTS:
	print()
	print("********** WARNING **********")
	print("This script will now help you choose between multiple conflicting passwords")
	print("assigned to the same website and user.")
	print("To do so, the script will print your passwords in plaintext.")
	print("Make sure you are using a secure device and that you are in")
	print("a secure location where no one can see your screen.")
	print("*********************************")
	wait_for_user_input('yes')
	final_indices, invalid_indices = prune_conflicting_passwords(conflicting_indices, entries)
	else:
	import itertools # for flattening the list of lists
	final_indices, invalid_indices = list(itertools.chain.from_iterable(conflicting_indices)), []

	written_entries = set()
	final_path = write_out(in_path, final_indices, entries, written_entries, 'final')
	dup_path = write_out(in_path, duplicate_indices, entries, written_entries, 'duplicates')
	if HANDLE_PASSWORD_CONFLICTS:
	inv_path = write_out(in_path, invalid_indices, entries, written_entries, 'conflicts')

	num_processed_entries = len(final_indices) + len(duplicate_indices) + len(invalid_indices)
	assert num_processed_entries == total_number_entries, f"Entries lost during processing: {num_processed_entries}/{total_number_entries}"
	print(f'{len(final_indices)} unique entries saved to {final_path}.')
	print(f'{len(duplicate_indices)} duplicate entries saved to {dup_path}.')
	if HANDLE_PASSWORD_CONFLICTS:
	print(f'{len(invalid_indices)} invalid entries saved to {inv_path}.')


	if __name__ == "__main__":
	if len(sys.argv) < 2:
	sys.exit('Supply input file path as command argument')
	main(sys.argv[1])