miraculixx/README.txt

## README.txt
Question by @nntaleb

  Throw a coin, H=head, T= Tail. Which sequence one is more likely?

  A={H,T,H,T,H,T,H,T,H,T,T,H,T,H,H,T,T,T,H,H}

  B={H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H}

  C={H,H,H,H,H,H,H,H,H,H,T,T,T,T,T,T,T,T,T,T}

The above is a Python implementation to evaluate this empirically.

Use as follows:

  $ python cointoss.py [<N>] [<T>]
  Of 10000 trials with 100 samples:
  - overall counts Counter({'A': 1, 'C': 1, 'B': 0})
  - winner counts Counter({'A': 9999, 'C': 1})


  N = number of samples for each trial, defaults to 1000
  T = number of trials, defaults to 1000

  Overall counts are the number of times each sequence was found, over all trials
  Winner counts are the number of times each sequence was found as the most frequent, in any one trial

  Reading example for above results: In 1000 trials with each 10000 samples, the sequences A and C
  where found exactly once each. Sequence B was never found (out of a total of 10'000'000 samples).


Interesting results:

$ python cointoss.py 1000 1000
Of 1000 trials with 1000 samples:
- overall counts Counter({'B': 1, 'A': 0, 'C': 0})
- winner counts Counter({'*': 999, 'B': 1})

$ python cointoss.py 10000 1000
Of 1000 trials with 10000 samples:
- overall counts Counter({'C': 13, 'B': 12, 'A': 11})
- winner counts Counter({'*': 964, 'C': 13, 'B': 12, 'A': 11})

$ python cointoss.py 100000 1000
Of 1000 trials with 100000 samples:
- overall counts Counter({'A': 114, 'C': 104, 'B': 90})
- winner counts Counter({'*': 734, 'A': 107, 'C': 85, 'B': 74})

$ python cointoss.py 1000000 1000
Of 1000 trials with 1000000 samples:
- overall counts Counter({'B': 1009, 'C': 988, 'A': 986})
- winner counts Counter({'A': 441, 'B': 294, 'C': 209, '*': 56})


Beware of increasing runtime O(N * T)


## cointoss.py
import sys
from collections import Counter
import numpy as np

def more_likely(N=1000):
    H, T = 0, 1
    A = np.array([H,T,H,T,H,T,H,T,H,T,T,H,T,H,H,T,T,T,H,H])
    B = np.array([H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H])
    C = np.array([H,H,H,H,H,H,H,H,H,H,T,T,T,T,T,T,T,T,T,T])
    sample = np.random.randint(H, T+1, (N, 20))
    count = lambda v: len(np.where((sample == v).all(axis=1))[0])
    counts = {
       'A': count(A),
       'B': count(B),
       'C': count(C),
    }
    return counts


def trials(N=1000, T=1000):
  # run T trials with N samples each
  counts = Counter()
  winners = Counter()
  for i in range(T):
     ex = more_likely(N)
     if max(ex.values()) > 0:
       # at least 1 sequence found
       win = max(ex.items(), key=lambda v: v[1])[0]
     else:
       win = '*'
     counts.update(ex)
     winners.update({win: 1})
  print(f"Of {T} trials with {N} samples:")
  print("- overall counts", counts)
  print("- winner counts", winners)

kwargs = {
    'N': int(sys.argv[1]) if len(sys.argv) > 1 else 1000,
    'T': int(sys.argv[2]) if len(sys.argv) > 2 else 1000,
}
trials(**kwargs)
	Question by @nntaleb

	Throw a coin, H=head, T= Tail. Which sequence one is more likely?

	A={H,T,H,T,H,T,H,T,H,T,T,H,T,H,H,T,T,T,H,H}

	B={H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H}

	C={H,H,H,H,H,H,H,H,H,H,T,T,T,T,T,T,T,T,T,T}

	The above is a Python implementation to evaluate this empirically.

	Use as follows:

	$ python cointoss.py [<N>] [<T>]
	Of 10000 trials with 100 samples:
	- overall counts Counter({'A': 1, 'C': 1, 'B': 0})
	- winner counts Counter({'A': 9999, 'C': 1})


	N = number of samples for each trial, defaults to 1000
	T = number of trials, defaults to 1000

	Overall counts are the number of times each sequence was found, over all trials
	Winner counts are the number of times each sequence was found as the most frequent, in any one trial

	Reading example for above results: In 1000 trials with each 10000 samples, the sequences A and C
	where found exactly once each. Sequence B was never found (out of a total of 10'000'000 samples).



	Interesting results:

	$ python cointoss.py 1000 1000
	Of 1000 trials with 1000 samples:
	- overall counts Counter({'B': 1, 'A': 0, 'C': 0})
	- winner counts Counter({'*': 999, 'B': 1})

	$ python cointoss.py 10000 1000
	Of 1000 trials with 10000 samples:
	- overall counts Counter({'C': 13, 'B': 12, 'A': 11})
	- winner counts Counter({'*': 964, 'C': 13, 'B': 12, 'A': 11})

	$ python cointoss.py 100000 1000
	Of 1000 trials with 100000 samples:
	- overall counts Counter({'A': 114, 'C': 104, 'B': 90})
	- winner counts Counter({'*': 734, 'A': 107, 'C': 85, 'B': 74})

	$ python cointoss.py 1000000 1000
	Of 1000 trials with 1000000 samples:
	- overall counts Counter({'B': 1009, 'C': 988, 'A': 986})
	- winner counts Counter({'A': 441, 'B': 294, 'C': 209, '*': 56})


	Beware of increasing runtime O(N * T)
	import sys
	from collections import Counter
	import numpy as np

	def more_likely(N=1000):
	H, T = 0, 1
	A = np.array([H,T,H,T,H,T,H,T,H,T,T,H,T,H,H,T,T,T,H,H])
	B = np.array([H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H,H])
	C = np.array([H,H,H,H,H,H,H,H,H,H,T,T,T,T,T,T,T,T,T,T])
	sample = np.random.randint(H, T+1, (N, 20))
	count = lambda v: len(np.where((sample == v).all(axis=1))[0])
	counts = {
	'A': count(A),
	'B': count(B),
	'C': count(C),
	}
	return counts


	def trials(N=1000, T=1000):
	# run T trials with N samples each
	counts = Counter()
	winners = Counter()
	for i in range(T):
	ex = more_likely(N)
	if max(ex.values()) > 0:
	# at least 1 sequence found
	win = max(ex.items(), key=lambda v: v[1])[0]
	else:
	win = '*'
	counts.update(ex)
	winners.update({win: 1})
	print(f"Of {T} trials with {N} samples:")
	print("- overall counts", counts)
	print("- winner counts", winners)

	kwargs = {
	'N': int(sys.argv[1]) if len(sys.argv) > 1 else 1000,
	'T': int(sys.argv[2]) if len(sys.argv) > 2 else 1000,
	}
	trials(**kwargs)