theepicsnail/deckmath.py

## deckmath.py
"""Math behind entropy of shuffled decks and card pulling strategies.

This file contains 3 parts:
1) Helper methods intended to make the rest of the code more human readable.
2) An exploration on the math behind different deck sizes and usage strategies.
3) A conclusion at the bottom with my personal opinion on the topic.

Some assumptions for this to hold:
An end user can shuffle a deck of cards into a random state.
    Note: this is more complicated than 7 riffle shuffles. http://frans.us/research/pubs/NewAgeS.pdf?i=1
    Casinos use washing, and have great incentive to unbias their deck ordering. Probably should explore that.
Cards are identifiable and unique.
"""
import itertools
from math import perm, log

# Helper methods
def entropy(comb):
    """Number of bits of entropy from a system that can be in {comb} combinations.

    >>> entropy(2)
    1.0
    >>> entropy(1024)
    10.0
    """
    return log(comb, 2)

def deck(size):
    """human meaningful name for number of combinations a deck of size {size} can be in
    Note this is just an alias for factorial. https://en.wikipedia.org/wiki/Factorial

    3 cards can be ordered in 6 ways.
    >>> deck(3)
    6

    Standard 52 card deck
    >>> deck(52)
    80658175170943878571660636856403766975289505440883277824000000000000
    """
    return perm(size)

def subdeck(size, n):
    """human meaningful name for number of combinations the the top N cards from a deck of size {size} can be in
    Note this is just an alias for permutations. https://en.wikipedia.org/wiki/Permutation

    taking 2 cards from a 3 card deck can be
    AB AC BA BC CA CB
    >>> subdeck(3,2)
    6

    Take 1 card from a 52 card deck, there are 52 possible outcomes.
    >>> subdeck(52,1)
    52
    """
    return perm(size, n)


def maxPhraseLength(bits):
    """Returns the maximum length a phrase could be given {bits} entropy of input.

    See https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki
    |  ENT  | CS | ENT+CS |  MS  |
    +-------+----+--------+------+
    |  128  |  4 |   132  |  12  |
    |  160  |  5 |   165  |  15  |
    |  192  |  6 |   198  |  18  |
    |  224  |  7 |   231  |  21  |
    |  256  |  8 |   264  |  24  |
    In this table, ENT refers to the minimum value from entropy for a particular phrase length
    MS is the phrases length.

    >>> maxPhraseLength(10)
    0
    >>> maxPhraseLength(127.999)
    0
    >>> maxPhraseLength(128)
    12
    >>> maxPhraseLength(300)
    24
    """
    for ent, ms in [(256, 24), (224, 21), (192, 18), (160, 15), (128, 12)]:
        if bits >= ent:
            return ms
    return 0


#
# Entropy from a full shuffled deck
#

def entropyFromShuffledDeck(size):
    """
    Entropy from a fully shuffled deck of the given size:

    a 'deck' with 1 card has 0 entropy
    >>> entropyFromShuffledDeck(1)
    0.0

    >>> entropyFromShuffledDeck(2)
    1.0

    >>> entropyFromShuffledDeck(52)
    225.5810031237028
    """
    return entropy(deck(size))

def phraseLengthFromShuffledDeck(size):
    """
    A single shuffled deck can safely generate a 21 word phrase directly
    >>> phraseLengthFromShuffledDeck(52)
    21

    Minimums required for 12 and 24 length phrases
    >>> phraseLengthFromShuffledDeck(35)
    12
    >>> phraseLengthFromShuffledDeck(58)
    24
    """
    return maxPhraseLength(entropyFromShuffledDeck(size))

#
# Entropy from selecting cards from a shuffled deck
#
def phraseLengthFromSubdeck(deckSize, sampleSize):
    """Using a portion of a larger deck can decrease the number of inputs a user provides.

    We can ask the user 25 cards from a deck of 52, to generate a 12 word phrase.
    >>> phraseLengthFromSubdeck(52,25)
    12

    Interesting note:
    25 cards * 2 characters per card = 50 user input characters.
    12 word seed phrase * 4 characters per word = 48 user input characters.
    This is almost the same input from the user.


    Using two different decks to make a larger deck (ensure that cards are still unique)
    >>> phraseLengthFromSubdeck(104,20)
    12
    >>> phraseLengthFromSubdeck(104,41)
    24


    Knowing that a 52 card deck is insufficient, exploring larger decks is of interest.
    A tarot card deck consists of 78 uniquely identifyable cards.
    >>> phraseLengthFromSubdeck(78, 22)
    12
    >>> phraseLengthFromSubdeck(78, 45)
    24
    """
    return maxPhraseLength(entropy(subdeck(deckSize, sampleSize)))

#
# Tarot card side quest
#
def orientableSubdeckCombinations(deckSize, sampleSize, orientations):
    """Computes the number of combinations drawing {sampleSize} cards from a deck of size {deckSize} can be done, taking into account each card has {orientations} orientations.

    This is equal to normal deck sampling, but multiplied by each card being able to take on {orientations} orientations.

    deck with 1 card, drawing 1 card which has 4 orientations, results in 4 possible outcomes
    >>> orientableSubdeckCombinations(1, 1, 4)
    4

    if there are 2 cards in the deck, and we draw 1, and each card has 4 possible outcomes
    >>> orientableSubdeckCombinations(2, 1, 4)
    8

    if there are 2 cards in the deck, and we draw both, and they can each have 4 outcomes
    e.g: 1NSWE 2NSWE
    # either draw (1,2) or (2,1), and each of those can independently be NSWE oriented.
    (1N,2N) (1N,2S) (1N,2W) (1N,2E)    (1S,2N) (1S,2S) (1S,2W) (1S,2E)    (1W,2N) (1W,2S) (1W,2W) (1W,2E)    (1E,2N) (1E,2S) (1E,2W) (1E,2E)
    (2N,1N) (2N,1S) (2N,1W) (2N,1E)    (2S,1N) (2S,1S) (2S,1W) (2S,1E)    (2W,1N) (2W,1S) (2W,1W) (2W,1E)    (2E,1N) (2E,1S) (2E,1W) (2E,1E)
    >>> orientableSubdeckCombinations(2, 2, 4)
    32

    Drawing all cards from a tarot card deck
    >>> orientableSubdeckCombinations(78, 78, 2)
    3422553976227391787888581738394426502980434653592428106796226472128649261689311538503375508282750142128151214977645019136000000000000000000
    """
    return subdeck(deckSize, sampleSize) * (orientations**sampleSize)

def phraseLengthFromOrientableSubdeck(deckSize, sampleSize, orientations):
    """Length of phrase that can be generated by sampling from a deck that's orientable.

    Tarot cards have 78 cards, and 2 orientations. For 12 and 24 phrases, you must draw at least 18 or 38 cards respectively.
    >>> phraseLengthFromOrientableSubdeck(78, 18, 2)
    12
    >>> phraseLengthFromOrientableSubdeck(78, 38, 2)
    24
    """
    return maxPhraseLength(entropy(orientableSubdeckCombinations(deckSize, sampleSize, orientations)))

#
# 'Extending' a deck by shuffling cards back into the deck.
#
def chainedDeckCombinations(deckSamples):
    """Computes the number of combinations for a series of (deckSize, sampleSize) samples that are independent.
    NOTE: Independent requires fully shuffling the deck, this can be done by reinserting used cards and shuffling, or using a separately shuffled deck.

    Draw 1 card from a deck of 52, then reshuffle and select 2 cards
      52 combinations for the first card
      52 combinations for the second card
      51 combinations for the third card
      = 52*52*51 = 137904
    >>> chainedDeckCombinations([(52,1), (52,2)])
    137904


    Bip39-diceware
    a coin is just a deck with 2 cards where we pick 1
    a dice is a deck with 6 cards where we pick 1
    Flip a coin, and roll 4 dice.

    >>> chainedDeckCombinations([(2,1),(6,1),(6,1),(6,1),(6,1)])
    2592

    To clip the outputs to 2048 total outputs, dicewear has 544 outputs that result in reroll [t4363-t6666]
    count = cases
        4 = t4363, t4364, t4365, t4366
       36 = t44**
       36 = t45**
       36 = t46**
      216 = t5***
      216 = t6***
      ---
      544
    Thus dicewear properly maps to 2048 outputs.
    """
    total = 1
    for sample in deckSamples:
        total *= subdeck(*sample)
    return total

def phraseLengthFromChainedDecks(deckSamples):
    """
    Choosing 6 cards, then shuffling them back in, then using all 52 cards will give you a 24 word phrase.
    This requires 58 cards being input
    >>> phraseLengthFromChainedDecks([(52,6), (52,52)])
    24


    For 49 inputs (16+16+17 cards) and 3 total shuffles, we can get 24 words.
    >>> phraseLengthFromChainedDecks([(52,16), (52,16), (52,17)])
    24
    """
    return maxPhraseLength(entropy(chainedDeckCombinations(deckSamples)))

def computeBestSplitsForShuffles(deckSize, shuffles, phraseLength):
    """
    One idea is to take a

    The fewest user inputs for generating a 24 word phrase using a 52 card deck and 3 shuffles is
    to shuffle, take 10, shuffle, take 16, shuffle, take 21
    Total: 47
    >>> computeBestSplitsForShuffles(52, 3, 24)
    (10, 16, 21)

    It's not possible to get a 24 word phrase out of 1 shuffled deck
    >>> computeBestSplitsForShuffles(52, 1, 24)
    ()

    These tests take a lot of cpu power to compute. Uncomment to verify, but these are left commented for shortened test time.
    2 shuffles has a total of 49 inputs
    # DISABLED TEST
    # >>> computeBestSplitsForShuffles(52, 2, 24)
    # (16, 33)

    # DISABLED TEST
    # >>> computeBestSplitsForShuffles(52, 4, 24)
    # (0, 10, 16, 21)

    # DISABLED TEST
    # >>> computeBestSplitsForShuffles(52, 5, 24)
    # (4, 9, 10, 11, 12)

    # DISABLED TEST
    # >>> computeBestSplitsForShuffles(52, 6, 24)
    # (0, 4, 9, 10, 11, 12)
    """
    for splits in sorted(itertools.combinations_with_replacement(range(deckSize), shuffles), key=sum):
        if phraseLengthFromChainedDecks(itertools.zip_longest([], splits, fillvalue=deckSize)) == phraseLength:
            return splits
    return ()


def conclusions():
    """
    A shuffled standard deck of 52 playing cards can generate a 12 word phrase
    >>> phraseLengthFromSubdeck(52,25)
    12

    A shuffled tarot card deck of 78 cards can generate both 12 and 24 word phrases
    >>> phraseLengthFromSubdeck(78, 22)
    12
    >>> phraseLengthFromSubdeck(78, 45)
    24

    At 2 characters a card, a 12 word tarot card is the most user input efficient solution.
    44 characters input from the user, 1 shuffle of the deck.
    Compared to 96 characters to input the seed phrase directly. this is over twice as efficient.
    """

# doctest: Running this file as a script will verify all the numbers listed in the comments.
#
# Note: some tests are disabled as they require a lot of cpu time. these are prefixed by "# DISABLED TEST"
if __name__ == "__main__":
    import doctest
    doctest.testmod()
	"""Math behind entropy of shuffled decks and card pulling strategies.

	This file contains 3 parts:
	1) Helper methods intended to make the rest of the code more human readable.
	2) An exploration on the math behind different deck sizes and usage strategies.
	3) A conclusion at the bottom with my personal opinion on the topic.

	Some assumptions for this to hold:
	An end user can shuffle a deck of cards into a random state.
	Note: this is more complicated than 7 riffle shuffles. http://frans.us/research/pubs/NewAgeS.pdf?i=1
	Casinos use washing, and have great incentive to unbias their deck ordering. Probably should explore that.
	Cards are identifiable and unique.
	"""
	import itertools
	from math import perm, log

	# Helper methods
	def entropy(comb):
	"""Number of bits of entropy from a system that can be in {comb} combinations.

	>>> entropy(2)
	1.0
	>>> entropy(1024)
	10.0
	"""
	return log(comb, 2)

	def deck(size):
	"""human meaningful name for number of combinations a deck of size {size} can be in
	Note this is just an alias for factorial. https://en.wikipedia.org/wiki/Factorial

	3 cards can be ordered in 6 ways.
	>>> deck(3)
	6

	Standard 52 card deck
	>>> deck(52)
	80658175170943878571660636856403766975289505440883277824000000000000
	"""
	return perm(size)

	def subdeck(size, n):
	"""human meaningful name for number of combinations the the top N cards from a deck of size {size} can be in
	Note this is just an alias for permutations. https://en.wikipedia.org/wiki/Permutation

	taking 2 cards from a 3 card deck can be
	AB AC BA BC CA CB
	>>> subdeck(3,2)
	6

	Take 1 card from a 52 card deck, there are 52 possible outcomes.
	>>> subdeck(52,1)
	52
	"""
	return perm(size, n)


	def maxPhraseLength(bits):
	"""Returns the maximum length a phrase could be given {bits} entropy of input.

	See https://github.com/bitcoin/bips/blob/master/bip-0039.mediawiki
	\| ENT \| CS \| ENT+CS \| MS \|
	+-------+----+--------+------+
	\| 128 \| 4 \| 132 \| 12 \|
	\| 160 \| 5 \| 165 \| 15 \|
	\| 192 \| 6 \| 198 \| 18 \|
	\| 224 \| 7 \| 231 \| 21 \|
	\| 256 \| 8 \| 264 \| 24 \|
	In this table, ENT refers to the minimum value from entropy for a particular phrase length
	MS is the phrases length.

	>>> maxPhraseLength(10)
	0
	>>> maxPhraseLength(127.999)
	0
	>>> maxPhraseLength(128)
	12
	>>> maxPhraseLength(300)
	24
	"""
	for ent, ms in [(256, 24), (224, 21), (192, 18), (160, 15), (128, 12)]:
	if bits >= ent:
	return ms
	return 0


	#
	# Entropy from a full shuffled deck
	#

	def entropyFromShuffledDeck(size):
	"""
	Entropy from a fully shuffled deck of the given size:

	a 'deck' with 1 card has 0 entropy
	>>> entropyFromShuffledDeck(1)
	0.0

	>>> entropyFromShuffledDeck(2)
	1.0

	>>> entropyFromShuffledDeck(52)
	225.5810031237028
	"""
	return entropy(deck(size))

	def phraseLengthFromShuffledDeck(size):
	"""
	A single shuffled deck can safely generate a 21 word phrase directly
	>>> phraseLengthFromShuffledDeck(52)
	21

	Minimums required for 12 and 24 length phrases
	>>> phraseLengthFromShuffledDeck(35)
	12
	>>> phraseLengthFromShuffledDeck(58)
	24
	"""
	return maxPhraseLength(entropyFromShuffledDeck(size))

	#
	# Entropy from selecting cards from a shuffled deck
	#
	def phraseLengthFromSubdeck(deckSize, sampleSize):
	"""Using a portion of a larger deck can decrease the number of inputs a user provides.

	We can ask the user 25 cards from a deck of 52, to generate a 12 word phrase.
	>>> phraseLengthFromSubdeck(52,25)
	12

	Interesting note:
	25 cards * 2 characters per card = 50 user input characters.
	12 word seed phrase * 4 characters per word = 48 user input characters.
	This is almost the same input from the user.


	Using two different decks to make a larger deck (ensure that cards are still unique)
	>>> phraseLengthFromSubdeck(104,20)
	12
	>>> phraseLengthFromSubdeck(104,41)
	24


	Knowing that a 52 card deck is insufficient, exploring larger decks is of interest.
	A tarot card deck consists of 78 uniquely identifyable cards.
	>>> phraseLengthFromSubdeck(78, 22)
	12
	>>> phraseLengthFromSubdeck(78, 45)
	24
	"""
	return maxPhraseLength(entropy(subdeck(deckSize, sampleSize)))

	#
	# Tarot card side quest
	#
	def orientableSubdeckCombinations(deckSize, sampleSize, orientations):
	"""Computes the number of combinations drawing {sampleSize} cards from a deck of size {deckSize} can be done, taking into account each card has {orientations} orientations.

	This is equal to normal deck sampling, but multiplied by each card being able to take on {orientations} orientations.

	deck with 1 card, drawing 1 card which has 4 orientations, results in 4 possible outcomes
	>>> orientableSubdeckCombinations(1, 1, 4)
	4

	if there are 2 cards in the deck, and we draw 1, and each card has 4 possible outcomes
	>>> orientableSubdeckCombinations(2, 1, 4)
	8

	if there are 2 cards in the deck, and we draw both, and they can each have 4 outcomes
	e.g: 1NSWE 2NSWE
	# either draw (1,2) or (2,1), and each of those can independently be NSWE oriented.
	(1N,2N) (1N,2S) (1N,2W) (1N,2E) (1S,2N) (1S,2S) (1S,2W) (1S,2E) (1W,2N) (1W,2S) (1W,2W) (1W,2E) (1E,2N) (1E,2S) (1E,2W) (1E,2E)
	(2N,1N) (2N,1S) (2N,1W) (2N,1E) (2S,1N) (2S,1S) (2S,1W) (2S,1E) (2W,1N) (2W,1S) (2W,1W) (2W,1E) (2E,1N) (2E,1S) (2E,1W) (2E,1E)
	>>> orientableSubdeckCombinations(2, 2, 4)
	32

	Drawing all cards from a tarot card deck
	>>> orientableSubdeckCombinations(78, 78, 2)
	3422553976227391787888581738394426502980434653592428106796226472128649261689311538503375508282750142128151214977645019136000000000000000000
	"""
	return subdeck(deckSize, sampleSize) * (orientations**sampleSize)

	def phraseLengthFromOrientableSubdeck(deckSize, sampleSize, orientations):
	"""Length of phrase that can be generated by sampling from a deck that's orientable.

	Tarot cards have 78 cards, and 2 orientations. For 12 and 24 phrases, you must draw at least 18 or 38 cards respectively.
	>>> phraseLengthFromOrientableSubdeck(78, 18, 2)
	12
	>>> phraseLengthFromOrientableSubdeck(78, 38, 2)
	24
	"""
	return maxPhraseLength(entropy(orientableSubdeckCombinations(deckSize, sampleSize, orientations)))

	#
	# 'Extending' a deck by shuffling cards back into the deck.
	#
	def chainedDeckCombinations(deckSamples):
	"""Computes the number of combinations for a series of (deckSize, sampleSize) samples that are independent.
	NOTE: Independent requires fully shuffling the deck, this can be done by reinserting used cards and shuffling, or using a separately shuffled deck.

	Draw 1 card from a deck of 52, then reshuffle and select 2 cards
	52 combinations for the first card
	52 combinations for the second card
	51 combinations for the third card
	= 525251 = 137904
	>>> chainedDeckCombinations([(52,1), (52,2)])
	137904


	Bip39-diceware
	a coin is just a deck with 2 cards where we pick 1
	a dice is a deck with 6 cards where we pick 1
	Flip a coin, and roll 4 dice.

	>>> chainedDeckCombinations([(2,1),(6,1),(6,1),(6,1),(6,1)])
	2592

	To clip the outputs to 2048 total outputs, dicewear has 544 outputs that result in reroll [t4363-t6666]
	count = cases
	4 = t4363, t4364, t4365, t4366
	36 = t44**
	36 = t45**
	36 = t46**
	216 = t5***
	216 = t6***
	---
	544
	Thus dicewear properly maps to 2048 outputs.
	"""
	total = 1
	for sample in deckSamples:
	total = subdeck(sample)
	return total

	def phraseLengthFromChainedDecks(deckSamples):
	"""
	Choosing 6 cards, then shuffling them back in, then using all 52 cards will give you a 24 word phrase.
	This requires 58 cards being input
	>>> phraseLengthFromChainedDecks([(52,6), (52,52)])
	24


	For 49 inputs (16+16+17 cards) and 3 total shuffles, we can get 24 words.
	>>> phraseLengthFromChainedDecks([(52,16), (52,16), (52,17)])
	24
	"""
	return maxPhraseLength(entropy(chainedDeckCombinations(deckSamples)))

	def computeBestSplitsForShuffles(deckSize, shuffles, phraseLength):
	"""
	One idea is to take a

	The fewest user inputs for generating a 24 word phrase using a 52 card deck and 3 shuffles is
	to shuffle, take 10, shuffle, take 16, shuffle, take 21
	Total: 47
	>>> computeBestSplitsForShuffles(52, 3, 24)
	(10, 16, 21)

	It's not possible to get a 24 word phrase out of 1 shuffled deck
	>>> computeBestSplitsForShuffles(52, 1, 24)
	()

	These tests take a lot of cpu power to compute. Uncomment to verify, but these are left commented for shortened test time.
	2 shuffles has a total of 49 inputs
	# DISABLED TEST
	# >>> computeBestSplitsForShuffles(52, 2, 24)
	# (16, 33)

	# DISABLED TEST
	# >>> computeBestSplitsForShuffles(52, 4, 24)
	# (0, 10, 16, 21)

	# DISABLED TEST
	# >>> computeBestSplitsForShuffles(52, 5, 24)
	# (4, 9, 10, 11, 12)

	# DISABLED TEST
	# >>> computeBestSplitsForShuffles(52, 6, 24)
	# (0, 4, 9, 10, 11, 12)
	"""
	for splits in sorted(itertools.combinations_with_replacement(range(deckSize), shuffles), key=sum):
	if phraseLengthFromChainedDecks(itertools.zip_longest([], splits, fillvalue=deckSize)) == phraseLength:
	return splits
	return ()


	def conclusions():
	"""
	A shuffled standard deck of 52 playing cards can generate a 12 word phrase
	>>> phraseLengthFromSubdeck(52,25)
	12

	A shuffled tarot card deck of 78 cards can generate both 12 and 24 word phrases
	>>> phraseLengthFromSubdeck(78, 22)
	12
	>>> phraseLengthFromSubdeck(78, 45)
	24

	At 2 characters a card, a 12 word tarot card is the most user input efficient solution.
	44 characters input from the user, 1 shuffle of the deck.
	Compared to 96 characters to input the seed phrase directly. this is over twice as efficient.
	"""

	# doctest: Running this file as a script will verify all the numbers listed in the comments.
	#
	# Note: some tests are disabled as they require a lot of cpu time. these are prefixed by "# DISABLED TEST"
	if __name__ == "__main__":
	import doctest
	doctest.testmod()