gr33ndata/Captain Tsubasa Passing Accuracy

## Captain Tsubasa Passing Accuracy
# Better run this in a Jupyer notebook
import numpy as np

p = 0.75
passes = np.random.binomial(n=1, p=p, size=1000)

# Check Mean and Std for the generated data
passes.mean().round(3), passes.std().round(3)

# Take random 1000 x 10 passes (with replacement)
passes10 = pd.Series(
    [
        np.random.choice(passes, size=10).mean()
        for i in range(1000)
    ]
)

# Plot KDE for the random samples
fig, ax =  plt.subplots(1,1)
passes10.plot(
    title='Distribution of 10 Passes Accuracies',
    kind='kde',
    xlim=(0,1),
    ax=ax
)
ax.axvline(x=passes10.mean(), color='k', ls='--', alpha=0.7)

# Check what percentage of the samples has 50% accuracy or less
(passes10 <= 0.5).mean() # 9%

# Do the same, with 30 samples this time
# Then plot it as before
passes30 = pd.Series(
    [
        np.random.choice(passes, size=30).mean()
        for i in range(1000)
    ]
)

# Check what percentage of the samples has 50% accuracy or less
(passes30 <= 0.5).mean() # 2%

# Do the same, with 300 samples this time
# Then plot it as before
passes30 = pd.Series(
    [
        np.random.choice(passes, size=300).mean()
        for i in range(1000)
    ]
)

# Check what percentage of the samples has 50% accuracy or less
(passes300 <= 0.5).mean() # 0%
	# Better run this in a Jupyer notebook
	import numpy as np

	p = 0.75
	passes = np.random.binomial(n=1, p=p, size=1000)

	# Check Mean and Std for the generated data
	passes.mean().round(3), passes.std().round(3)

	# Take random 1000 x 10 passes (with replacement)
	passes10 = pd.Series(
	[
	np.random.choice(passes, size=10).mean()
	for i in range(1000)
	]
	)

	# Plot KDE for the random samples
	fig, ax = plt.subplots(1,1)
	passes10.plot(
	title='Distribution of 10 Passes Accuracies',
	kind='kde',
	xlim=(0,1),
	ax=ax
	)
	ax.axvline(x=passes10.mean(), color='k', ls='--', alpha=0.7)

	# Check what percentage of the samples has 50% accuracy or less
	(passes10 <= 0.5).mean() # 9%

	# Do the same, with 30 samples this time
	# Then plot it as before
	passes30 = pd.Series(
	[
	np.random.choice(passes, size=30).mean()
	for i in range(1000)
	]
	)

	# Check what percentage of the samples has 50% accuracy or less
	(passes30 <= 0.5).mean() # 2%

	# Do the same, with 300 samples this time
	# Then plot it as before
	passes30 = pd.Series(
	[
	np.random.choice(passes, size=300).mean()
	for i in range(1000)
	]
	)

	# Check what percentage of the samples has 50% accuracy or less
	(passes300 <= 0.5).mean() # 0%