theepicsnail/deckToWords.py

## deckToWords.py
# Notes: This does not generate a valid seed.
# Theres a handful of endianness problems.
# This does not compute checksums
# This is a proof of concept for converting a permutation into a base 2048 number.
# With a few more tweaks and some precise memory management, it should be compatable with proper bep-39 tools.


from math import perm, log
DECK_52 = [face+suit for suit in "HCDS" for face in "A234567890JQK"]

def factorial(x): # human meaningful name.
    return perm(x)

def lehmerEncodeInplace(permutation:list[int])->list[int]:
    """Compute the lehmer encoding of a permutation inplace. Also returns it.
    https://en.wikipedia.org/wiki/Lehmer_code

    Lehmer codes are a list of numbers that encode a permutation of the integers [0,n)
    Without explaining too detailed, it's effectively building a permutation by removing numbers from a list of possible numbers

    Example:
    to encode [2,3,0,1] we first have an empty array (output) and an array with [0,n) (available)

    output [] available [0,1,2,3]
    the first digit in the permutation is a 2, which in the available list is at position [2]
    append 2 to the output, remove [2] from available

    output [2] available [0,1,3]
    the second digit in the permutation is a 3, which is at position [2] in the available array
    append 2 to the output, remove [2] from available

    output [2, 2] available [0,1]
    the third digit in the permutation is a 0, which is at position [0] in the available array
    append 0 to the output, remove [0] from available

    output [2, 2, 0], available [1]
    the fourth digit in the permutation is a 1, which is at position [0] in the available array
    append 0 to the output, remove [0] from available

    output [2, 2, 0, 0]

    >>> lehmerEncodeInplace([2, 3, 0, 1])
    [2, 2, 0, 0]

    In order we always select the first available number
    >>> lehmerEncodeInplace([0, 1, 2, 3, 4])
    [0, 0, 0, 0, 0]

    Reverse order we always select the last available number
    >>> lehmerEncodeInplace([4, 3, 2, 1, 0])
    [4, 3, 2, 1, 0]

    Wiki example
    >>> lehmerEncodeInplace([1, 5, 0, 6, 3, 4, 2])
    [1, 4, 0, 3, 1, 1, 0]
    """
    for i in range(len(permutation)):
        for j in range(i+1, len(permutation)):
            if permutation[j] > permutation[i]:
                permutation[j] -= 1
    return permutation

def lehmerToNumber(lehmer: list[int]):
    """
    lehmer codes double as a factorial numbering system
    https://en.wikipedia.org/wiki/Factorial_number_system

    each place value has a maximum value as:
    [n, n-1, n-2, ..., 2, 1]

    The last number only has 1 possible value [0]
    The second to last number only has 2 possible values [0,1]
    and so on.

    For example from above: [2,3,0,1] has a lehmer code of [2,2,0,0]
    We can read this as a factorial number system
    code:        2,   2,   0,   0
    place value: 3!   2!   1!   0!
            = 2*3! + 2*2! + 0*1! + 0*0!
            = 2*6  + 2*2  + 0*1  + 0*1
            = 16

    Listing all permutations of 0,1,2,3 in order we can see the permutation [2,3,0,1] has index 16
    0) 0,1,2,3      1) 0,1,3,2      2) 0,2,1,3      3) 0,2,3,1
    4) 0,3,1,2      5) 0,3,2,1      6) 1,0,2,3      7) 1,0,3,2
    8) 1,2,0,3      9) 1,2,3,0     10) 1,3,0,2     11) 1,3,2,0
    12) 2,0,1,3    13) 2,0,3,1     14) 2,1,0,3     15) 2,1,3,0
    16) 2,3,0,1

    >>> lehmerToNumber(lehmerEncodeInplace([2, 3, 0, 1]))
    16

    Permutation 0 is just in order
    >>> lehmerToNumber(lehmerEncodeInplace([0, 1, 2, 3]))
    0

    Permutation n!-1 is the last permutation, reverse order
    >>> lehmerToNumber(lehmerEncodeInplace([3, 2, 1, 0]))
    23
    """
    # iterate them in reverse so the last digit has place=0
    total = 0
    for place, value in enumerate(lehmer[::-1]):
        total += value * factorial(place)
    return total

def numberToBase(number:int, base:int):
    """We rewrite the number in base {base}
    This is done via the simple division and reminder loop.

    1s 2s 4s 8s
    >>> numberToBase(8, 2)
    [0, 0, 0, 1]

    >>> numberToBase(1, 2048)
    [1]

    >>> numberToBase(2047, 2048)
    [2047]

    >>> numberToBase(2048, 2048)
    [0, 1]
    """
    if number == 0:
        # returning [] (which is technically right) is confusing
        return [0]

    out = []
    while number > 0:
        d, m = divmod(number, base)
        out.append(m)
        number = d
    return out


def inputAndDeckToPerm(user_input:list[str], deck:list[str])->list[int]:
    return [deck.index(token) for token in user_input]

def fullExample(user_input, deck, base):
    """
    Use a provided {user_input} from a possible {deck} and encode that permutation
    into an array representation of base {base}.

    >>> DECK_4 = ["A", "B", "C", "D"]

    Sorted deck returns the first word
    >>> fullExample(["A", "B", "C", "D"], DECK_4, 2)
    [0]

    >>> fullExample(["D", "C", "B", "A"], DECK_4, 2)
    [1, 1, 1, 0, 1]

    DCBA is permutation 23
    1 + 2 + 4 + 0 + 16 = 23

    For cases where the user input does not consume the whole deck, we still need a full
    permutation, so we we append the rest of the deck in order to the users permutation.
    This is most easily done by extending the lehmer encoding with 0s until it is the length of the deck.

    AH 2H 3H ... KH AC ... KS
    >>> DECK_52 = [face+suit for suit in "HCDS" for face in "A234567890JQK"]
    >>> import random
    >>> random.seed(0)
    >>> EXAMPLE_25_CARDS = random.sample(DECK_52, 25)
    >>> print(" ".join(EXAMPLE_25_CARDS))
    QC 0S AD 3H 4C 7D 6D KC 7C 5D 0C QD AC 8S 9H 6C 2S 7H 9S 0H 5S 4H JC QS 2D
    >>> fullExample(EXAMPLE_25_CARDS, DECK_52, 2048)
    [125, 1395, 671, 714, 1556, 1154, 939, 1961, 1698, 1785, 1876, 1335]

    Unshuffled deck returns 0, which if you wanted to extend to 12 words it's [0,0,0,0, 0,0,0,0, 0,0,0,0]
    >>> fullExample(DECK_52[:25], DECK_52, 2048)
    [0]


    2S = 40
    9H = 8
    has a padding of 50
    has a lehmer code of [40, 8, 0, 0, 0, 0 ..50 0s..]
    has a value of 40*51! + 8*50!
    after compensating for padding
    has a value of 40*51!/50! + 8*50!/50!
    has a value of 40*51 + 8
    has a value of 2048
    has an encoding of [0,1] in base 2048

    >>> fullExample(["2S",  "9H"], DECK_52, 2048)
    [0, 1]
    """

    # Convert user input into a permutation.
    perm = inputAndDeckToPerm(user_input, deck)

    # Convert permutation into a complete lehmer encoding.
    lehmerEncodeInplace(perm)
    padding = (len(deck) - len(perm))
    perm += [0] * padding

    # Convert the encoding to it numeric value.
    perm_id = lehmerToNumber(perm)

    # remove the effect of the padding added above
    perm_id //= factorial(padding)

    # Export that in the requested base.
    return numberToBase(perm_id, base)


def main_test():
    import doctest
    doctest.testmod()

def main_52(card_string:str, word_list_file:str):
    # Validate that the user input is sane.
    # 25 cards, all from the DECK_52 set, with no duplicates.
    cards = card_string.split(" ")
    assert len(cards) == 25, "Expected exactly 25 cards, got %s" % len(cards)

    bad_cards = set(cards) - set(DECK_52)
    assert not bad_cards, "Invalid cards: %s" % bad_cards

    duplicate_cards = list(cards)
    for c in set(cards):
        duplicate_cards.remove(c)
    assert not duplicate_cards, "Found duplicate cards: %s" % duplicate_cards


    word_list_content = b''
    with open(word_list_file,"rb") as f:
        word_list_content = f.read()
    words = word_list_content.decode().splitlines()

    import hashlib
    s = hashlib.sha256()
    s.update(word_list_content)
    print("Wordlist sha256: " + s.hexdigest())
    # english.txt sha256 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda


    base = len(words)
    phrase_indexes = fullExample(cards, DECK_52, base)
    print(phrase_indexes)
    print("Phrase:")
    for i, w in enumerate(phrase_indexes):
        print("%2i) %s" % (i, words[w]))


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser()
    actions = parser.add_mutually_exclusive_group(required=True)
    actions.add_argument("--test", help="Run tests", action="store_true")
    actions.add_argument("--cards_52", help="Generate a 12 word phrase from 25 cards of a 52 card deck.", metavar="cards")
    parser.add_argument("--word_list_file", help="Word list to use. Likely bip-39 list.")

    args = parser.parse_args()
    if args.test:
        main_test()
    elif args.cards_52:
        assert args.word_list_file, "--word_list_file required with --cards_52"
        main_52(args.cards_52, args.word_list_file)

# Save
# https://raw.githubusercontent.com/bitcoin/bips/master/bip-0039/english.txt
# as english.txt
#
# python .\deckToWords.py --cards_52 "QC 0S AD 3H 4C 7D 6D KC 7C 5D 0C QD AC 8S 9H 6C 2S 7H 9S 0H 5S 4H JC QS 2D" --word_list_file=english.txt
# Word list contains 2048 words. From abandon to zoo
# Phrase:
#  0) autumn
#  1) purpose
#  2) fault
#  3) flock
#  4) secret
#  5) motion
#  6) install
#  7) vivid
#  8) stairs
#  9) tennis
# 10) tunnel
# 11) plunge
	# Notes: This does not generate a valid seed.
	# Theres a handful of endianness problems.
	# This does not compute checksums
	# This is a proof of concept for converting a permutation into a base 2048 number.
	# With a few more tweaks and some precise memory management, it should be compatable with proper bep-39 tools.


	from math import perm, log
	DECK_52 = [face+suit for suit in "HCDS" for face in "A234567890JQK"]

	def factorial(x): # human meaningful name.
	return perm(x)

	def lehmerEncodeInplace(permutation:list[int])->list[int]:
	"""Compute the lehmer encoding of a permutation inplace. Also returns it.
	https://en.wikipedia.org/wiki/Lehmer_code

	Lehmer codes are a list of numbers that encode a permutation of the integers [0,n)
	Without explaining too detailed, it's effectively building a permutation by removing numbers from a list of possible numbers

	Example:
	to encode [2,3,0,1] we first have an empty array (output) and an array with [0,n) (available)

	output [] available [0,1,2,3]
	the first digit in the permutation is a 2, which in the available list is at position [2]
	append 2 to the output, remove [2] from available

	output [2] available [0,1,3]
	the second digit in the permutation is a 3, which is at position [2] in the available array
	append 2 to the output, remove [2] from available

	output [2, 2] available [0,1]
	the third digit in the permutation is a 0, which is at position [0] in the available array
	append 0 to the output, remove [0] from available

	output [2, 2, 0], available [1]
	the fourth digit in the permutation is a 1, which is at position [0] in the available array
	append 0 to the output, remove [0] from available

	output [2, 2, 0, 0]

	>>> lehmerEncodeInplace([2, 3, 0, 1])
	[2, 2, 0, 0]

	In order we always select the first available number
	>>> lehmerEncodeInplace([0, 1, 2, 3, 4])
	[0, 0, 0, 0, 0]

	Reverse order we always select the last available number
	>>> lehmerEncodeInplace([4, 3, 2, 1, 0])
	[4, 3, 2, 1, 0]

	Wiki example
	>>> lehmerEncodeInplace([1, 5, 0, 6, 3, 4, 2])
	[1, 4, 0, 3, 1, 1, 0]
	"""
	for i in range(len(permutation)):
	for j in range(i+1, len(permutation)):
	if permutation[j] > permutation[i]:
	permutation[j] -= 1
	return permutation

	def lehmerToNumber(lehmer: list[int]):
	"""
	lehmer codes double as a factorial numbering system
	https://en.wikipedia.org/wiki/Factorial_number_system

	each place value has a maximum value as:
	[n, n-1, n-2, ..., 2, 1]

	The last number only has 1 possible value [0]
	The second to last number only has 2 possible values [0,1]
	and so on.

	For example from above: [2,3,0,1] has a lehmer code of [2,2,0,0]
	We can read this as a factorial number system
	code: 2, 2, 0, 0
	place value: 3! 2! 1! 0!
	= 23! + 22! + 01! + 00!
	= 26 + 22 + 01 + 01
	= 16

	Listing all permutations of 0,1,2,3 in order we can see the permutation [2,3,0,1] has index 16
	0) 0,1,2,3 1) 0,1,3,2 2) 0,2,1,3 3) 0,2,3,1
	4) 0,3,1,2 5) 0,3,2,1 6) 1,0,2,3 7) 1,0,3,2
	8) 1,2,0,3 9) 1,2,3,0 10) 1,3,0,2 11) 1,3,2,0
	12) 2,0,1,3 13) 2,0,3,1 14) 2,1,0,3 15) 2,1,3,0
	16) 2,3,0,1

	>>> lehmerToNumber(lehmerEncodeInplace([2, 3, 0, 1]))
	16

	Permutation 0 is just in order
	>>> lehmerToNumber(lehmerEncodeInplace([0, 1, 2, 3]))
	0

	Permutation n!-1 is the last permutation, reverse order
	>>> lehmerToNumber(lehmerEncodeInplace([3, 2, 1, 0]))
	23
	"""
	# iterate them in reverse so the last digit has place=0
	total = 0
	for place, value in enumerate(lehmer[::-1]):
	total += value * factorial(place)
	return total

	def numberToBase(number:int, base:int):
	"""We rewrite the number in base {base}
	This is done via the simple division and reminder loop.

	1s 2s 4s 8s
	>>> numberToBase(8, 2)
	[0, 0, 0, 1]

	>>> numberToBase(1, 2048)
	[1]

	>>> numberToBase(2047, 2048)
	[2047]

	>>> numberToBase(2048, 2048)
	[0, 1]
	"""
	if number == 0:
	# returning [] (which is technically right) is confusing
	return [0]

	out = []
	while number > 0:
	d, m = divmod(number, base)
	out.append(m)
	number = d
	return out




	def inputAndDeckToPerm(user_input:list[str], deck:list[str])->list[int]:
	return [deck.index(token) for token in user_input]

	def fullExample(user_input, deck, base):
	"""
	Use a provided {user_input} from a possible {deck} and encode that permutation
	into an array representation of base {base}.

	>>> DECK_4 = ["A", "B", "C", "D"]

	Sorted deck returns the first word
	>>> fullExample(["A", "B", "C", "D"], DECK_4, 2)
	[0]

	>>> fullExample(["D", "C", "B", "A"], DECK_4, 2)
	[1, 1, 1, 0, 1]

	DCBA is permutation 23
	1 + 2 + 4 + 0 + 16 = 23

	For cases where the user input does not consume the whole deck, we still need a full
	permutation, so we we append the rest of the deck in order to the users permutation.
	This is most easily done by extending the lehmer encoding with 0s until it is the length of the deck.

	AH 2H 3H ... KH AC ... KS
	>>> DECK_52 = [face+suit for suit in "HCDS" for face in "A234567890JQK"]
	>>> import random
	>>> random.seed(0)
	>>> EXAMPLE_25_CARDS = random.sample(DECK_52, 25)
	>>> print(" ".join(EXAMPLE_25_CARDS))
	QC 0S AD 3H 4C 7D 6D KC 7C 5D 0C QD AC 8S 9H 6C 2S 7H 9S 0H 5S 4H JC QS 2D
	>>> fullExample(EXAMPLE_25_CARDS, DECK_52, 2048)
	[125, 1395, 671, 714, 1556, 1154, 939, 1961, 1698, 1785, 1876, 1335]

	Unshuffled deck returns 0, which if you wanted to extend to 12 words it's [0,0,0,0, 0,0,0,0, 0,0,0,0]
	>>> fullExample(DECK_52[:25], DECK_52, 2048)
	[0]


	2S = 40
	9H = 8
	has a padding of 50
	has a lehmer code of [40, 8, 0, 0, 0, 0 ..50 0s..]
	has a value of 4051! + 850!
	after compensating for padding
	has a value of 4051!/50! + 850!/50!
	has a value of 40*51 + 8
	has a value of 2048
	has an encoding of [0,1] in base 2048

	>>> fullExample(["2S", "9H"], DECK_52, 2048)
	[0, 1]
	"""

	# Convert user input into a permutation.
	perm = inputAndDeckToPerm(user_input, deck)

	# Convert permutation into a complete lehmer encoding.
	lehmerEncodeInplace(perm)
	padding = (len(deck) - len(perm))
	perm += [0] * padding

	# Convert the encoding to it numeric value.
	perm_id = lehmerToNumber(perm)

	# remove the effect of the padding added above
	perm_id //= factorial(padding)

	# Export that in the requested base.
	return numberToBase(perm_id, base)



	def main_test():
	import doctest
	doctest.testmod()

	def main_52(card_string:str, word_list_file:str):
	# Validate that the user input is sane.
	# 25 cards, all from the DECK_52 set, with no duplicates.
	cards = card_string.split(" ")
	assert len(cards) == 25, "Expected exactly 25 cards, got %s" % len(cards)

	bad_cards = set(cards) - set(DECK_52)
	assert not bad_cards, "Invalid cards: %s" % bad_cards

	duplicate_cards = list(cards)
	for c in set(cards):
	duplicate_cards.remove(c)
	assert not duplicate_cards, "Found duplicate cards: %s" % duplicate_cards


	word_list_content = b''
	with open(word_list_file,"rb") as f:
	word_list_content = f.read()
	words = word_list_content.decode().splitlines()

	import hashlib
	s = hashlib.sha256()
	s.update(word_list_content)
	print("Wordlist sha256: " + s.hexdigest())
	# english.txt sha256 2f5eed53a4727b4bf8880d8f3f199efc90e58503646d9ff8eff3a2ed3b24dbda


	base = len(words)
	phrase_indexes = fullExample(cards, DECK_52, base)
	print(phrase_indexes)
	print("Phrase:")
	for i, w in enumerate(phrase_indexes):
	print("%2i) %s" % (i, words[w]))


	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser()
	actions = parser.add_mutually_exclusive_group(required=True)
	actions.add_argument("--test", help="Run tests", action="store_true")
	actions.add_argument("--cards_52", help="Generate a 12 word phrase from 25 cards of a 52 card deck.", metavar="cards")
	parser.add_argument("--word_list_file", help="Word list to use. Likely bip-39 list.")

	args = parser.parse_args()
	if args.test:
	main_test()
	elif args.cards_52:
	assert args.word_list_file, "--word_list_file required with --cards_52"
	main_52(args.cards_52, args.word_list_file)

	# Save
	# https://raw.githubusercontent.com/bitcoin/bips/master/bip-0039/english.txt
	# as english.txt
	#
	# python .\deckToWords.py --cards_52 "QC 0S AD 3H 4C 7D 6D KC 7C 5D 0C QD AC 8S 9H 6C 2S 7H 9S 0H 5S 4H JC QS 2D" --word_list_file=english.txt
	# Word list contains 2048 words. From abandon to zoo
	# Phrase:
	# 0) autumn
	# 1) purpose
	# 2) fault
	# 3) flock
	# 4) secret
	# 5) motion
	# 6) install
	# 7) vivid
	# 8) stairs
	# 9) tennis
	# 10) tunnel
	# 11) plunge