codecademydev/script.py Secret

## script.py
# AB_Test _HYPOTHESIS TESTING¬_Analyzing Farmburg's Jan 2021 Ping
# Import libraries
import codecademylib3
import pandas as pd
import numpy as np

# Read in the `clicks.csv` file as `abdata`
abdata = pd.read_csv('clicks.csv')
print(abdata.head())
#T2. Chi-Square test -  relationship between two categorical variables
import pandas as pd
from scipy.stats import chi2_contingency
# create a contingency table
contingency_group_purch = pd.crosstab(abdata.group, abdata.is_purchase)
print("contingency_group_purchase:")
print(contingency_group_purch)
#contingency_group_purchase:
#is_purchase    No  Yes
#group
#A            1350  316
#B            1483  183
#C            1583   83
# Group A has the highest number of purchases, which is 316.
# run a chi-square test:
chi2, pval, dof, expected = chi2_contingency(contingency_group_purch)
print(" pval for contingency_group_purch is : " + str(pval))
# pval for contingency_group_purch is : 2.4126213546684264e-35 < 0.05 significance threshold, reject H-null, accept H-alt, conclude significant difference.
# Yes, there is a significant difference in the purchase rate for group A, B and C.
#T5, number of visitors
num_visits = (abdata.user_id).nunique()
#num_visits = len(abdata)
print("Number of visitors each week: " + str(num_visits))
# for $0.99
num_sales_needed_099 = np.ceil(1000/ 0.99)
print("num_sales_needed_099: " + str(num_sales_needed_099))
p_sales_needed_099 = round(num_sales_needed_099/ num_visits, 2)
print("p_sales_needed_099: " + str(p_sales_needed_099))
# for $1.99
num_sales_needed_199 = np.ceil(1000/ 1.99)
print("num_sales_needed_199: " + str(num_sales_needed_199))
p_sales_needed_199 = round(num_sales_needed_199/ num_visits, 2)
print("p_sales_needed_199: " + str(p_sales_needed_199))

# for $4.99
num_sales_needed_499 = np.ceil(1000/ 4.99)
print("num_sales_needed_499: " + str(num_sales_needed_499))
p_sales_needed_499 = round(num_sales_needed_499/ num_visits, 2)
print("p_sales_needed_499: " + str(p_sales_needed_499))
#Number of visitors each week: 4998
##num_sales_needed_099: 1011.0
#p_sales_needed_099: 0.2
#num_sales_needed_199: 503.0
#p_sales_needed_199: 0.1
#num_sales_needed_499: 201.0
#p_sales_needed_499: 0.04
#T8 Sample size and number of purchases - Group A
samp_size_099 = np.sum(abdata.group == 'A')
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_A = round(sales_099/ samp_size_099, 2)
print('Total number of visitors in group A is: ' + str(samp_size_099))
print('The number of visitors in group A who made a purchase is: ' + str(sales_099))
print("observed_purchase_rate_A " + str(observed_purchase_rate_A))
# Total number of visitors in group A is: 1666
#The number of visitors in group A who made a purchase is: 316
#T8 Sample size and number of purchases - Group B $1.99
samp_size_199 = np.sum(abdata.group == 'B')
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_B = round(sales_199/ samp_size_199, 2)
print('Total number of visitors in group B is: ' + str(samp_size_199))
print('The number of visitors in group B who made a purchase is: ' + str(sales_199))
print("observed_purchase_rate_B " + str(observed_purchase_rate_B))
#T8 Sample size and number of purchases - Group C $4.99
samp_size_499 = np.sum(abdata.group == 'C')
sales_499 = np.sum((abdata.group == "C") & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_C = round(sales_499/ samp_size_499, 2)
print('Total number of visitors in group C is: ' + str(samp_size_499))
print('The number of visitors in group C who made a purchase is: ' + str(sales_499))
print("observed_purchase_rate_C " + str(observed_purchase_rate_C))
#Total number of visitors in group A is: 1666
#The number of visitors in group A who made a purchase is: 316
#Total number of visitors in group B is: 1666
#The number of visitors in group B who made a purchase is: 183
#Total number of visitors in group C is: 1666
#The number of visitors in group C who made a purchase is: 83
#T10. Binom test for group A
from scipy.stats import binom_test
pval_A_2sided = binom_test(sales_099, samp_size_099, p_sales_needed_099)
pval_A_1sided = binom_test(sales_099, samp_size_099, p_sales_needed_099, alternative = 'greater')
print("pval_A_2sided is: " + str(pval_A_2sided))
print("pval_A_1sided is: " + str(pval_A_1sided))
#pval_A_2sided is: 0.29791642311457833
#pval_A_1sided is: 0.861100905910942

#pval_A  > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
#T10. Binom test for group B $1.99
from scipy.stats import binom_test
pval_B_2sided = binom_test(sales_199, samp_size_199, p_sales_needed_199)
pval_B_1sided = binom_test(sales_199, samp_size_199, p_sales_needed_199, alternative = 'greater')
print("pval_B_2sided is: " + str(pval_B_2sided))
print("pval_B_1sided is: " + str(pval_B_1sided))
#pval_B_2sided is: 0.1778866596062784
#pval_B_1sided is: 0.0982588983603735

#pval_B  > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
##T10. Binom test for group C $4.99
from scipy.stats import binom_test
pval_C_2sided = binom_test(sales_499, samp_size_499, p_sales_needed_499)
pval_C_1sided = binom_test(sales_499, samp_size_499, p_sales_needed_499, alternative = 'greater')
print("pval_C_2sided is: " + str(pval_C_2sided))
print("pval_C_1sided is: " + str(pval_C_1sided))
#observed_purchase_rate_C 0.05, p_sales_needed_499: 0.04
#pval_C_2sided is: 0.04517298955409145
#pval_C_1sided is: 0.02663954665996981

#pval_C  < 0.05 (significance threshold), reject H_null, accept H-alt, conclude there is  significant difference.
# The observed purchase rate is significantly greater or different than the purchase rate that results in the minimum revenue target.
#T13.  Conclusion:
# Group C purchase rate is significantly higher than the target.  Based on this information, Brian should charge $4.99 for the upgrade package.

## solution.py
# Import libraries
import codecademylib3
import pandas as pd
import numpy as np

# Read in the `clicks.csv` file as `abdata`
abdata = pd.read_csv('clicks.csv')

# Inspect the dataframe
print(abdata.head())

# Create a contingency table with pd.crosstab
Xtab = pd.crosstab(abdata.group, abdata.is_purchase)

# Print the contingency table
print(Xtab)

# Import chi2_contingency module
from scipy.stats import chi2_contingency

# Calculate the p-value
chi2, pval, dof, expected = chi2_contingency(Xtab)

# Print the p-value
print(pval)

# Determine if the p-value is significant
is_significant = True

# Calculate and print the number of visits
num_visits = len(abdata)

# Print the number of visits
print(num_visits)

# Calculate the purchase rate needed at 0.99
num_sales_needed_099 = 1000/0.99
p_sales_needed_099 = num_sales_needed_099/num_visits

# Print the purchase rate needed at 0.99
print(p_sales_needed_099)

# Calculate the purchase rate needed at 1.99
num_sales_needed_199 = 1000/1.99
p_sales_needed_199 = num_sales_needed_199/num_visits

# Print the purchase rate needed at 1.99
print(p_sales_needed_199)

# Calculate the purchase rate needed at 4.99
num_sales_needed_499 = 1000/4.99
p_sales_needed_499 = num_sales_needed_499/num_visits

# Print the purchase rate needed at 4.99
print(p_sales_needed_499)

# Calculate samp size & sales for 0.99 price point
samp_size_099 = np.sum(abdata.group == 'A')
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes'))

# Print samp size & sales for 0.99 price point
print(samp_size_099)
print(sales_099)

# Calculate samp size & sales for 1.99 price point
samp_size_199 = np.sum(abdata.group == 'B')
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))

# Print samp size & sales for 1.99 price point
print(samp_size_199)
print(sales_199)

# Calculate samp size & sales for 4.99 price point
samp_size_499 = np.sum(abdata.group == 'C')
sales_499 = np.sum((abdata.group == 'C') & (abdata.is_purchase == 'Yes'))

# Print samp size & sales for 4.99 price point
print(samp_size_499)
print(sales_499)

# Import the binom_test module
from scipy.stats import binom_test

# Calculate the p-value for Group A
pvalueA = binom_test(sales_099, n=samp_size_099, p=p_sales_needed_099, alternative='greater')

# Print the p-value for Group A
print(pvalueA)

# Calculate the p-value for Group B
pvalueB = binom_test(sales_199, n=samp_size_199, p=p_sales_needed_199, alternative='greater')

# Print the p-value for Group B
print(pvalueB)

# Calculate the p-value for Group C
pvalueC = binom_test(sales_499, n=samp_size_499, p=p_sales_needed_499, alternative='greater')

# Print the p-value for Group C
print(pvalueC)

# Set the correct value for the final answer variable
final_answer = '4.99'

# Print the chosen price group
print(final_answer)
	# AB_Test _HYPOTHESIS TESTING¬_Analyzing Farmburg's Jan 2021 Ping
	# Import libraries
	import codecademylib3
	import pandas as pd
	import numpy as np

	# Read in the `clicks.csv` file as `abdata`
	abdata = pd.read_csv('clicks.csv')
	print(abdata.head())
	#T2. Chi-Square test - relationship between two categorical variables
	import pandas as pd
	from scipy.stats import chi2_contingency
	# create a contingency table
	contingency_group_purch = pd.crosstab(abdata.group, abdata.is_purchase)
	print("contingency_group_purchase:")
	print(contingency_group_purch)
	#contingency_group_purchase:
	#is_purchase No Yes
	#group
	#A 1350 316
	#B 1483 183
	#C 1583 83
	# Group A has the highest number of purchases, which is 316.
	# run a chi-square test:
	chi2, pval, dof, expected = chi2_contingency(contingency_group_purch)
	print(" pval for contingency_group_purch is : " + str(pval))
	# pval for contingency_group_purch is : 2.4126213546684264e-35 < 0.05 significance threshold, reject H-null, accept H-alt, conclude significant difference.
	# Yes, there is a significant difference in the purchase rate for group A, B and C.
	#T5, number of visitors
	num_visits = (abdata.user_id).nunique()
	#num_visits = len(abdata)
	print("Number of visitors each week: " + str(num_visits))
	# for $0.99
	num_sales_needed_099 = np.ceil(1000/ 0.99)
	print("num_sales_needed_099: " + str(num_sales_needed_099))
	p_sales_needed_099 = round(num_sales_needed_099/ num_visits, 2)
	print("p_sales_needed_099: " + str(p_sales_needed_099))
	# for $1.99
	num_sales_needed_199 = np.ceil(1000/ 1.99)
	print("num_sales_needed_199: " + str(num_sales_needed_199))
	p_sales_needed_199 = round(num_sales_needed_199/ num_visits, 2)
	print("p_sales_needed_199: " + str(p_sales_needed_199))

	# for $4.99
	num_sales_needed_499 = np.ceil(1000/ 4.99)
	print("num_sales_needed_499: " + str(num_sales_needed_499))
	p_sales_needed_499 = round(num_sales_needed_499/ num_visits, 2)
	print("p_sales_needed_499: " + str(p_sales_needed_499))
	#Number of visitors each week: 4998
	##num_sales_needed_099: 1011.0
	#p_sales_needed_099: 0.2
	#num_sales_needed_199: 503.0
	#p_sales_needed_199: 0.1
	#num_sales_needed_499: 201.0
	#p_sales_needed_499: 0.04
	#T8 Sample size and number of purchases - Group A
	samp_size_099 = np.sum(abdata.group == 'A')
	sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes'))
	observed_purchase_rate_A = round(sales_099/ samp_size_099, 2)
	print('Total number of visitors in group A is: ' + str(samp_size_099))
	print('The number of visitors in group A who made a purchase is: ' + str(sales_099))
	print("observed_purchase_rate_A " + str(observed_purchase_rate_A))
	# Total number of visitors in group A is: 1666
	#The number of visitors in group A who made a purchase is: 316
	#T8 Sample size and number of purchases - Group B $1.99
	samp_size_199 = np.sum(abdata.group == 'B')
	sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))
	observed_purchase_rate_B = round(sales_199/ samp_size_199, 2)
	print('Total number of visitors in group B is: ' + str(samp_size_199))
	print('The number of visitors in group B who made a purchase is: ' + str(sales_199))
	print("observed_purchase_rate_B " + str(observed_purchase_rate_B))
	#T8 Sample size and number of purchases - Group C $4.99
	samp_size_499 = np.sum(abdata.group == 'C')
	sales_499 = np.sum((abdata.group == "C") & (abdata.is_purchase == 'Yes'))
	observed_purchase_rate_C = round(sales_499/ samp_size_499, 2)
	print('Total number of visitors in group C is: ' + str(samp_size_499))
	print('The number of visitors in group C who made a purchase is: ' + str(sales_499))
	print("observed_purchase_rate_C " + str(observed_purchase_rate_C))
	#Total number of visitors in group A is: 1666
	#The number of visitors in group A who made a purchase is: 316
	#Total number of visitors in group B is: 1666
	#The number of visitors in group B who made a purchase is: 183
	#Total number of visitors in group C is: 1666
	#The number of visitors in group C who made a purchase is: 83
	#T10. Binom test for group A
	from scipy.stats import binom_test
	pval_A_2sided = binom_test(sales_099, samp_size_099, p_sales_needed_099)
	pval_A_1sided = binom_test(sales_099, samp_size_099, p_sales_needed_099, alternative = 'greater')
	print("pval_A_2sided is: " + str(pval_A_2sided))
	print("pval_A_1sided is: " + str(pval_A_1sided))
	#pval_A_2sided is: 0.29791642311457833
	#pval_A_1sided is: 0.861100905910942

	#pval_A > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
	# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
	#T10. Binom test for group B $1.99
	from scipy.stats import binom_test
	pval_B_2sided = binom_test(sales_199, samp_size_199, p_sales_needed_199)
	pval_B_1sided = binom_test(sales_199, samp_size_199, p_sales_needed_199, alternative = 'greater')
	print("pval_B_2sided is: " + str(pval_B_2sided))
	print("pval_B_1sided is: " + str(pval_B_1sided))
	#pval_B_2sided is: 0.1778866596062784
	#pval_B_1sided is: 0.0982588983603735

	#pval_B > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
	# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
	##T10. Binom test for group C $4.99
	from scipy.stats import binom_test
	pval_C_2sided = binom_test(sales_499, samp_size_499, p_sales_needed_499)
	pval_C_1sided = binom_test(sales_499, samp_size_499, p_sales_needed_499, alternative = 'greater')
	print("pval_C_2sided is: " + str(pval_C_2sided))
	print("pval_C_1sided is: " + str(pval_C_1sided))
	#observed_purchase_rate_C 0.05, p_sales_needed_499: 0.04
	#pval_C_2sided is: 0.04517298955409145
	#pval_C_1sided is: 0.02663954665996981

	#pval_C < 0.05 (significance threshold), reject H_null, accept H-alt, conclude there is significant difference.
	# The observed purchase rate is significantly greater or different than the purchase rate that results in the minimum revenue target.
	#T13. Conclusion:
	# Group C purchase rate is significantly higher than the target. Based on this information, Brian should charge $4.99 for the upgrade package.