Last active
June 19, 2024 19:46
-
-
Save howird/d0a2afe845f5c10d9f71c7b9ed6a3e60 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
BITWARDEN PASSWORD CONFLICT RESOLVER AND DUPLICATE REMOVER | |
---------------------------------------------------------- | |
PREREQUISITES: | |
------------- | |
- a working Python 3 installation | |
- A CSV file export of your Bitwarden Data. Bitwarden Web -> Tools -> Export Data format: .csv | |
DISCLAIMER: | |
---------- | |
I am not responsible for any data loss or security breaches. | |
I have used this script myself and it worked flawlessly for me. | |
For your peace of mind: | |
1. Make sure you have a backup of your Bitwarden data. | |
2. This script DOES NOT access the internet. | |
3. This script DOES NOT overwrite your export. | |
4. This script is open source and it contains various assertions throughout | |
the code to validate it working correctly. It confirms whether: | |
- the csv header and number of fields in each row/entry are valid | |
- the website-username combinations are consistent each time a | |
conflict is processed | |
- the number of entries exported is equal to the number of entries | |
in the input file | |
- each entry from the input is written exactly ONCE to an output file | |
""" | |
import os | |
import sys | |
import hashlib | |
from urllib.parse import urlparse | |
from collections import defaultdict | |
HANDLE_PASSWORD_CONFLICTS = True | |
# Fields in Bitwarden CSV | |
HEADER = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp' | |
FILE_EXT = '.csv' | |
HEADER_LIST = HEADER.split(',') | |
KEY2IDX = {k: i for i, k in enumerate(HEADER_LIST)} | |
def parse_bitwarden_export(in_path: str) -> list[list[str]]: | |
""" Parses Bitwarden export file and returns a list of entries. | |
Args: | |
in_path (str): path to the Bitwarden export file. | |
Returns: | |
list[list[str]] a each entry is a list of values corresponding to the fields in the header. | |
""" | |
assert in_path.endswith(FILE_EXT), f'Invalid file extension: {in_path}, must be a {FILE_EXT} file.' | |
with open(in_path, 'r', encoding='utf8') as f: | |
entries = [] | |
for i, line in enumerate(f): | |
line = line.rstrip() | |
if i == 0: | |
assert ( | |
line == HEADER, | |
f"Invalid header: {line}, should be: '{HEADER}'. " | |
"Bitwarden's export format may have changed." | |
"Consider updating the script, checking the input file, or contacting author." | |
) | |
continue | |
if not line: | |
print(f"WARNING: Empty line at index {i}.") | |
continue | |
entry = line.split(',') | |
assert len(entry) == len(HEADER_LIST), f"Invalid number of fields in entry {i}:\n{line}" | |
entries.append(entry) | |
return entries | |
def prune_duplicate_entries(entries: list[list[str]]) -> tuple[list[list[int]], list[int]]: | |
""" Prunes duplicate entries from the list of entries. | |
Args: | |
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header. | |
Returns: | |
list[list[int]]: each element is a list of indices to `entries` of with conflicting passwords. | |
list[int]: indices to `entries` of duplicate entries. | |
""" | |
site_user_pw_set = set() | |
site_user_dict: dict = defaultdict(list) | |
duplicate_indices = [] | |
for i, entry in enumerate(entries): | |
# Generate an MD5 hash based on login URI, username, and password | |
domain = urlparse(entry[KEY2IDX['login_uri']]).netloc | |
if len(domain) > 0: | |
entry[KEY2IDX['login_uri']] = domain | |
else: | |
print(f"Invalid URI on line {i}:\n{','.join(entry)}") | |
token = entry[KEY2IDX['login_uri']] + entry[KEY2IDX['login_username']] | |
su_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() | |
token += entry[KEY2IDX['login_password']] | |
sup_hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() | |
# if exact site-user-password hash has not been seen before.. | |
if sup_hash not in site_user_pw_set: | |
# ..mark it as seen | |
site_user_pw_set.add(sup_hash) | |
# ..add it to the dictionary | |
site_user_dict[su_hash].append(i) | |
else: | |
# print(f"Duplicate entry on line {i}:\n{line}") | |
duplicate_indices.append(i) | |
return list(site_user_dict.values()), duplicate_indices | |
def clear() -> None: | |
""" | |
Clears the terminal screen and scroll back to present | |
the user with a nice clean, new screen. Useful for managing | |
menu screens in terminal applications. | |
""" | |
os.system('cls||echo -e \\\\033c') | |
def wait_for_user_input(user_input: str) -> None: | |
""" Waits for user input before continuing. """ | |
if input(f"Type '{user_input}' to continue. ") == user_input: | |
print() | |
else: | |
sys.exit("Different response provided. Exiting.") | |
def prune_conflicting_passwords(conflicting_indices: list[list[int]], entries: list[list[str]], choose_first: bool = False) -> tuple[list[int], list[int]]: | |
""" Resolves conflicting passwords in entries from user input. | |
Args: | |
conflicting_indices (list[list[int]]): each element is a list of indices to `entries` of with conflicting passwords. | |
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header. | |
choose_first (bool): if True, no user input is required and the first password is chosen by default. | |
Returns: | |
list[int]: indices to `entries` of conflicting entries that were kept. | |
list[int]: indices to `entries` of conflicting entries that were removed. | |
""" | |
num_conflicts = 0 | |
for indices in conflicting_indices: | |
num_options = len(indices) | |
assert num_options > 0, "This should not be possible, likely a bug in parse_duplicate_entries()." | |
if num_options > 1: | |
num_conflicts += 1 | |
print( | |
f"Found {num_conflicts} conflicting passwords.", | |
"Choosing first password to resolve." if choose_first else "You will be prompted to choose between them." | |
) | |
wait_for_user_input('yes') | |
clear() | |
final_indices = [] | |
invalid_indices = [] | |
i_conflict = 0 | |
for indices in conflicting_indices: | |
num_options = len(indices) | |
if num_options == 1 or choose_first: | |
final_indices.append(indices[0]) | |
else: | |
curr_uri = entries[indices[0]][KEY2IDX['login_uri']] | |
curr_user = entries[indices[0]][KEY2IDX['login_username']] | |
for index in indices: | |
assert curr_uri == entries[index][KEY2IDX['login_uri']] | |
assert curr_user == entries[index][KEY2IDX['login_username']] | |
i_conflict += 1 | |
print(f"Resolving conflict no. {i_conflict} of {num_conflicts}") | |
print(f"Duplicate entries for site '{curr_uri}' and username '{curr_user}':") | |
for i, index in enumerate(indices): | |
print(f"{i}: {entries[index][KEY2IDX['login_password']]}") | |
while True: | |
try: | |
user_input = int(input( "Enter the number corresponding to the password" | |
" you want to keep (Alternatively, -1 to keep none," | |
" -2 to keep all, Ctrl+C to stop): ")) | |
if -2 <= user_input < num_options: | |
break | |
else: | |
print(f"Invalid input. Please choose a number between 0 and {num_options - 1}.") | |
except ValueError: | |
print("Invalid input. Please enter a valid integer.") | |
clear() | |
for i, index in enumerate(indices): | |
if user_input == -2: | |
final_indices.append(index) | |
elif user_input == -1 or i != user_input: | |
invalid_indices.append(index) | |
else: | |
final_indices.append(index) | |
return final_indices, invalid_indices | |
def write_out(in_path: str, indices: list[int], entries: list[list[str]], written_entries: set[int], suffix: str = 'out') -> str: | |
""" Exports entries to a Bitwarden CSV file. | |
Args: | |
in_path (str): path to input file. | |
indices (list[int]): indices to `entries` to export. | |
entries (list[list[str]]): each entry is a list of values corresponding to the fields in the header. | |
written_entries (set[int]): only for bookkeeping. set of indices that have already been written to the output file. | |
suffix (str, optional): string to append to filename. Defaults to 'out'. | |
Returns: | |
str: path to the output file. | |
""" | |
csv_out = '_' + suffix + FILE_EXT | |
out_path = in_path.replace(FILE_EXT, csv_out) | |
with open(out_path, 'w', encoding='utf8') as out_file: | |
out_file.write(HEADER + '\n') | |
for i in indices: | |
out_file.write(','.join(entries[i]) + '\n') | |
assert i not in written_entries, f"Entry {i} has already been written to an output file." | |
written_entries.add(i) | |
return out_path | |
def main(in_path: str) -> None: | |
clear() | |
print(__doc__) | |
wait_for_user_input('yes') | |
clear() | |
entries = parse_bitwarden_export(in_path) | |
total_number_entries = len(entries) | |
print(f"Found {total_number_entries} entries in the input file.") | |
conflicting_indices, duplicate_indices = prune_duplicate_entries(entries) | |
print(f"Finished scanning for duplicates. Found {len(duplicate_indices)}.") | |
if HANDLE_PASSWORD_CONFLICTS: | |
print() | |
print("************ WARNING ************") | |
print("This script will now help you choose between multiple conflicting passwords") | |
print("assigned to the same website and user.") | |
print("To do so, the script will print your passwords in plaintext.") | |
print("Make sure you are using a secure device and that you are in") | |
print("a secure location where no one can see your screen.") | |
print("*********************************") | |
wait_for_user_input('yes') | |
final_indices, invalid_indices = prune_conflicting_passwords(conflicting_indices, entries) | |
else: | |
import itertools # for flattening the list of lists | |
final_indices, invalid_indices = list(itertools.chain.from_iterable(conflicting_indices)), [] | |
written_entries = set() | |
final_path = write_out(in_path, final_indices, entries, written_entries, 'final') | |
dup_path = write_out(in_path, duplicate_indices, entries, written_entries, 'duplicates') | |
if HANDLE_PASSWORD_CONFLICTS: | |
inv_path = write_out(in_path, invalid_indices, entries, written_entries, 'conflicts') | |
num_processed_entries = len(final_indices) + len(duplicate_indices) + len(invalid_indices) | |
assert num_processed_entries == total_number_entries, f"Entries lost during processing: {num_processed_entries}/{total_number_entries}" | |
print(f'{len(final_indices)} unique entries saved to {final_path}.') | |
print(f'{len(duplicate_indices)} duplicate entries saved to {dup_path}.') | |
if HANDLE_PASSWORD_CONFLICTS: | |
print(f'{len(invalid_indices)} invalid entries saved to {inv_path}.') | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
sys.exit('Supply input file path as command argument') | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment