Skip to content

Instantly share code, notes, and snippets.

@chumaumenze
Created July 5, 2021 19:54
Show Gist options
  • Save chumaumenze/dc40333ede9584f6b63c4efd38dd1139 to your computer and use it in GitHub Desktop.
Save chumaumenze/dc40333ede9584f6b63c4efd38dd1139 to your computer and use it in GitHub Desktop.
Parses and reorganise exported credentials from Bitwarden
## Bitwarden Export Parser
Parses and reorganise exported credentials from Bitwarden
import csv
import typing as t
import tldextract
from collections import OrderedDict
from urllib.parse import urlparse
T_data = t.OrderedDict[str, str]
T_htable = t.Dict[str, t.Union[t.List[T_data], T_data]]
def main(in_csv_path: str, out_csv_path: str, duplicate_csv_path: str):
htable: T_htable = OrderedDict()
dup_htable = []
invalid_bucket = []
notes_bucket = []
with open(in_csv_path) as f_in:
in_csv = csv.DictReader(f_in)
field_names = in_csv.fieldnames
for r in in_csv:
if r['type'] == 'note':
notes_bucket.append(r)
continue
try:
key, data = parse_row(r)
except IndexError:
invalid_bucket.append(r)
# Check duplicates & multi-values
if key in htable and handle_duplicate(htable, key, data):
dup_htable.append(data)
else:
htable[key] = data
write_csv(out_csv_path, [*htable.values(), *notes_bucket], field_names)
write_csv(duplicate_csv_path, dup_htable, field_names)
def write_csv(file_name: str, rows, field_names: t.Sequence[str]):
with open(file_name, 'w') as f:
out_csv = csv.DictWriter(f, field_names)
out_csv.writeheader()
out_csv.writerows(rows)
def parse_row(data: T_data) -> t.Tuple[str, T_data]:
if data['login_uri'].startswith('http://android://'):
key = data['login_uri'].split('==@')[-1].rstrip('/').split('.')
key_len = len(key)
if key_len == 2:
key = key[1]
elif key_len == 3:
key = ' '.join(key[1:])
else:
key = ' '.join(key[1:4])
else:
key = tldextract.extract(data['login_uri']).domain
# key = urlparse(data['login_uri']).netloc.split('.')[-2]
data['name'] = key.title()
data['type'] = data['type'] or 'login'
data['favorite'] = ''
return key.title(), data
def handle_duplicate(htable: T_htable, key: str,
data: T_data, check_values=None) -> bool:
"""Returns true if duplicate value"""
def do_compare(old, new):
old_loc = urlparse(old['login_uri']).netloc
new_loc = urlparse(new['login_uri']).netloc
if old_loc == new_loc:
return True
return all([old[k] == new[k] for k in check_values])
check_values = check_values or ['type', 'notes', 'fields',
'login_username', 'login_password',
'folder']
_t = type(htable[key]).__name__
if _t == 'list':
for d in htable[key]:
if do_compare(d, data):
return True
htable[key].append(data)
return False
if _t in ('OrderedDict', 'dict'):
if do_compare(htable[key], data):
return True
htable[key] = [htable[key], data]
return False
raise ValueError(f"Unexpected type found: {_t}\nData: {htable[key]}")
if __name__ == '__main__':
in_csv_path = "./bitwarden_export_20210704104034.csv"
out_csv_path = "./bitwarden_export_20210704104034_processed.csv"
duplicate_csv_path = "./bitwarden_export_20210704104034_duplicates.csv"
main(in_csv_path, out_csv_path, duplicate_csv_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment