ypcrts/1password_to_keepass_compatible_csv.py

## 1password_to_keepass_compatible_csv.py
"""
@author ypcrts
@date 2018-05-01

tested on win 10, python 3.6.4
uses only python standard library

KeepassXC has parser some CSV parser bugs / difficulties
because it supports only a subset of the CSV format
spectrum. This script is an attempt to port 1Password
CSV exports to be usable with KeepassXC CSV import.

Features
- fill up username and password fields regardless of
  case in the exported field names
- identify TOTP secrets and list them high in the notes
  section for manual handling
  (KeepassXC cannot import these directly via CSV now,
  alternative would be using lib to create a Keepass 1
  database setting the `totp` attribute on entries)
- delete garbage fieleds including uuids
- dump the full data minus garbage into the notes section
  to provide a log trace
"""

import csv
import datetime
import re
import urllib.parse
import copy
from pprint import pprint


fp_r = open('1password_export.csv', 'r', newline='')
dialect = csv.Sniffer().sniff(fp_r.read(1024))
fp_r.seek(0)
cr = csv.DictReader(fp_r, dialect=dialect)

FILE_MAX_ROWS = 100
rowcounter = 0
fp_o = None
cw = None

def new_fp_o():
    global rowcounter, fp_o, cw
    filenum = int(rowcounter / FILE_MAX_ROWS) * FILE_MAX_ROWS
    if fp_o:
        fp_o.flush()
        fp_o.close()
    fp_o = open('parser_output_{}.csv'.format(filenum), 'w')
    cw = csv.DictWriter(fp_o, fieldnames=fields, dialect='www')

    # if using KeepassXC because we have exact default field alignment
    # so omit the header
    # cw.writeheader()

fields = [
    'group',
    'title',
    'username',
    'password',
    'url',
    'notes',
    'last_modified',
    'created']
csv.register_dialect(
    'www',
    delimiter=',',
    quotechar='"',
    escapechar='\\',
    doublequote=False,
    quoting=csv.QUOTE_ALL,
    lineterminator='\r\n',
)
new_fp_o()

now = datetime.datetime.utcnow().isoformat('T', 'minutes')

for r in cr:
    parsed = dict(
        group=now,
        notes='>>> BEGIN KEEPASS PARSER {} >>>>>>>>>>>>>\n'.format(now)
    )

    # PRINCIPAL FIELDS
    for n in ('username', 'password', 'title',):
        val = r.get(n, r.get(n[0].upper() + n[1:]))
        parsed[n] = val
        parsed['notes'] += "{}: {}\n".format(n, val or 'MISSING', '\n')
        del r[n]

    # URLS->URL FIELD
    if r['urls']:
        parsed['url'] = r['urls']
        del r['urls']

    # DELETE TRASH FIELDS
    for n in ('autosubmit', 'grouping', 'tags', 'scope', 'uuid'):
        del r[n]

    for k in list(r.keys()):
        if k.startswith('section:'):
            del r[k]

    if r.get('ainfo') == parsed['username']:
        del r['ainfo']

    # ALL REMAINING FIELDS
    # caution: DictReader collects unnamed fields into terminal list
    collitems = list(r.items())
    if collitems[-1][0] is None:
        sentinel = collitems.pop()
        for s in sentinel[1]:
            collitems.append((None, s))

    for item in collitems:
        if len(item) == 2:
            k = item[0]
            v = item[1]
        else:
            k = ''
            v = item

        v = v.strip() if v else v
        if not v:
            continue
        has_totp_uri = 'otpauth://' in v.lower()
        is_totp = (
            has_totp_uri or
            'totp' in k.lower() or
            'totp/' in v.lower()
        )
        if not is_totp:
            continue

        parsed['notes'] += 'totp: {} ({})'.format(v, k)

        del r[k]

    parsed['notes'] += "\n>>> UNPARSABLE FIELDS >>>>>>>>>>>>\n"
    for k, v in r.items():
        if v and not isinstance(v, str):
            v = "\n".join(v)
        if not v:
            continue
        parsed['notes'] += "{}: {}\n".format(k, v, '\n')

    parsed['notes'] += "\n>>> END "\
                       "KEEPASS PARSER {} >>>>>>>>>>>>\n".format(now)

    # example: how to skip an entry that provokes a Keepass CSV parser bug
    #if rowcounter < 200 and any(map(lambda x: x and 'SOME_PARSER_BUG_STRING' in x, parsed.values())):
    #    continue

    # example: how to skip an entry that provokes a Keepass CSV parser bug
    #if 325 < rowcounter < 350 and 'SOME_TITLE_SUBSTRING' in parsed['title']:
    #    continue


    rowcounter += 1
    if rowcounter % FILE_MAX_ROWS == 0:
        new_fp_o()

    pprint(parsed)
    cw.writerow(parsed)

    # example: manually verify parsing into dicts row by row
    # q = input()
else:
    # pythonic (ugly) way to not bother flushing unless success
    # not really brill, but hey
    fp_o.flush()
    fp_o.close()
	"""
	@author ypcrts
	@date 2018-05-01

	tested on win 10, python 3.6.4
	uses only python standard library

	KeepassXC has parser some CSV parser bugs / difficulties
	because it supports only a subset of the CSV format
	spectrum. This script is an attempt to port 1Password
	CSV exports to be usable with KeepassXC CSV import.

	Features
	- fill up username and password fields regardless of
	case in the exported field names
	- identify TOTP secrets and list them high in the notes
	section for manual handling
	(KeepassXC cannot import these directly via CSV now,
	alternative would be using lib to create a Keepass 1
	database setting the `totp` attribute on entries)
	- delete garbage fieleds including uuids
	- dump the full data minus garbage into the notes section
	to provide a log trace
	"""

	import csv
	import datetime
	import re
	import urllib.parse
	import copy
	from pprint import pprint


	fp_r = open('1password_export.csv', 'r', newline='')
	dialect = csv.Sniffer().sniff(fp_r.read(1024))
	fp_r.seek(0)
	cr = csv.DictReader(fp_r, dialect=dialect)

	FILE_MAX_ROWS = 100
	rowcounter = 0
	fp_o = None
	cw = None

	def new_fp_o():
	global rowcounter, fp_o, cw
	filenum = int(rowcounter / FILE_MAX_ROWS) * FILE_MAX_ROWS
	if fp_o:
	fp_o.flush()
	fp_o.close()
	fp_o = open('parser_output_{}.csv'.format(filenum), 'w')
	cw = csv.DictWriter(fp_o, fieldnames=fields, dialect='www')

	# if using KeepassXC because we have exact default field alignment
	# so omit the header
	# cw.writeheader()

	fields = [
	'group',
	'title',
	'username',
	'password',
	'url',
	'notes',
	'last_modified',
	'created']
	csv.register_dialect(
	'www',
	delimiter=',',
	quotechar='"',
	escapechar='\\',
	doublequote=False,
	quoting=csv.QUOTE_ALL,
	lineterminator='\r\n',
	)
	new_fp_o()

	now = datetime.datetime.utcnow().isoformat('T', 'minutes')

	for r in cr:
	parsed = dict(
	group=now,
	notes='>>> BEGIN KEEPASS PARSER {} >>>>>>>>>>>>>\n'.format(now)
	)

	# PRINCIPAL FIELDS
	for n in ('username', 'password', 'title',):
	val = r.get(n, r.get(n[0].upper() + n[1:]))
	parsed[n] = val
	parsed['notes'] += "{}: {}\n".format(n, val or 'MISSING', '\n')
	del r[n]

	# URLS->URL FIELD
	if r['urls']:
	parsed['url'] = r['urls']
	del r['urls']

	# DELETE TRASH FIELDS
	for n in ('autosubmit', 'grouping', 'tags', 'scope', 'uuid'):
	del r[n]

	for k in list(r.keys()):
	if k.startswith('section:'):
	del r[k]

	if r.get('ainfo') == parsed['username']:
	del r['ainfo']

	# ALL REMAINING FIELDS
	# caution: DictReader collects unnamed fields into terminal list
	collitems = list(r.items())
	if collitems[-1][0] is None:
	sentinel = collitems.pop()
	for s in sentinel[1]:
	collitems.append((None, s))

	for item in collitems:
	if len(item) == 2:
	k = item[0]
	v = item[1]
	else:
	k = ''
	v = item

	v = v.strip() if v else v
	if not v:
	continue
	has_totp_uri = 'otpauth://' in v.lower()
	is_totp = (
	has_totp_uri or
	'totp' in k.lower() or
	'totp/' in v.lower()
	)
	if not is_totp:
	continue

	parsed['notes'] += 'totp: {} ({})'.format(v, k)

	del r[k]

	parsed['notes'] += "\n>>> UNPARSABLE FIELDS >>>>>>>>>>>>\n"
	for k, v in r.items():
	if v and not isinstance(v, str):
	v = "\n".join(v)
	if not v:
	continue
	parsed['notes'] += "{}: {}\n".format(k, v, '\n')

	parsed['notes'] += "\n>>> END "\
	"KEEPASS PARSER {} >>>>>>>>>>>>\n".format(now)

	# example: how to skip an entry that provokes a Keepass CSV parser bug
	#if rowcounter < 200 and any(map(lambda x: x and 'SOME_PARSER_BUG_STRING' in x, parsed.values())):
	# continue

	# example: how to skip an entry that provokes a Keepass CSV parser bug
	#if 325 < rowcounter < 350 and 'SOME_TITLE_SUBSTRING' in parsed['title']:
	# continue


	rowcounter += 1
	if rowcounter % FILE_MAX_ROWS == 0:
	new_fp_o()

	pprint(parsed)
	cw.writerow(parsed)

	# example: manually verify parsing into dicts row by row
	# q = input()
	else:
	# pythonic (ugly) way to not bother flushing unless success
	# not really brill, but hey
	fp_o.flush()
	fp_o.close()