mlzxy/outcome_sampling.py

## outcome_sampling.py
# in cfr, we simulate all actions
v[I] = {a: cfr(h + [a], {**π_i, P(h): π_i[P(h)] * σ[t][I][a]}, i, t)
    						  for a in A[I]}

# in outcome sampling mccfr, we only need to sample one a from A[I]
a = sample(A[I], σ[t][I]) # or use `ϵ * uniform + (1-ϵ) * σ[t][I]`
v[I][a] = mccfr(h + [a], {**π_i, P(h): π_i[P(h)] * σ[t][I][a]})
	# in cfr, we simulate all actions
	v[I] = {a: cfr(h + [a], {*π_i, P(h): π_i[P(h)] σ[t][I][a]}, i, t)
	for a in A[I]}

	# in outcome sampling mccfr, we only need to sample one a from A[I]
	a = sample(A[I], σ[t][I]) # or use `ϵ * uniform + (1-ϵ) * σ[t][I]`
	v[I][a] = mccfr(h + [a], {*π_i, P(h): π_i[P(h)] σ[t][I][a]})