Skip to content

Instantly share code, notes, and snippets.

@codecademydev
Created January 31, 2021 03:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codecademydev/6f7540e9827c117bf7494e963ee3332f to your computer and use it in GitHub Desktop.
Save codecademydev/6f7540e9827c117bf7494e963ee3332f to your computer and use it in GitHub Desktop.
Codecademy export
# AB_Test _HYPOTHESIS TESTING¬_Analyzing Farmburg's Jan 2021 Ping
# Import libraries
import codecademylib3
import pandas as pd
import numpy as np
# Read in the `clicks.csv` file as `abdata`
abdata = pd.read_csv('clicks.csv')
print(abdata.head())
#T2. Chi-Square test - relationship between two categorical variables
import pandas as pd
from scipy.stats import chi2_contingency
# create a contingency table
contingency_group_purch = pd.crosstab(abdata.group, abdata.is_purchase)
print("contingency_group_purchase:")
print(contingency_group_purch)
#contingency_group_purchase:
#is_purchase No Yes
#group
#A 1350 316
#B 1483 183
#C 1583 83
# Group A has the highest number of purchases, which is 316.
# run a chi-square test:
chi2, pval, dof, expected = chi2_contingency(contingency_group_purch)
print(" pval for contingency_group_purch is : " + str(pval))
# pval for contingency_group_purch is : 2.4126213546684264e-35 < 0.05 significance threshold, reject H-null, accept H-alt, conclude significant difference.
# Yes, there is a significant difference in the purchase rate for group A, B and C.
#T5, number of visitors
num_visits = (abdata.user_id).nunique()
#num_visits = len(abdata)
print("Number of visitors each week: " + str(num_visits))
# for $0.99
num_sales_needed_099 = np.ceil(1000/ 0.99)
print("num_sales_needed_099: " + str(num_sales_needed_099))
p_sales_needed_099 = round(num_sales_needed_099/ num_visits, 2)
print("p_sales_needed_099: " + str(p_sales_needed_099))
# for $1.99
num_sales_needed_199 = np.ceil(1000/ 1.99)
print("num_sales_needed_199: " + str(num_sales_needed_199))
p_sales_needed_199 = round(num_sales_needed_199/ num_visits, 2)
print("p_sales_needed_199: " + str(p_sales_needed_199))
# for $4.99
num_sales_needed_499 = np.ceil(1000/ 4.99)
print("num_sales_needed_499: " + str(num_sales_needed_499))
p_sales_needed_499 = round(num_sales_needed_499/ num_visits, 2)
print("p_sales_needed_499: " + str(p_sales_needed_499))
#Number of visitors each week: 4998
##num_sales_needed_099: 1011.0
#p_sales_needed_099: 0.2
#num_sales_needed_199: 503.0
#p_sales_needed_199: 0.1
#num_sales_needed_499: 201.0
#p_sales_needed_499: 0.04
#T8 Sample size and number of purchases - Group A
samp_size_099 = np.sum(abdata.group == 'A')
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_A = round(sales_099/ samp_size_099, 2)
print('Total number of visitors in group A is: ' + str(samp_size_099))
print('The number of visitors in group A who made a purchase is: ' + str(sales_099))
print("observed_purchase_rate_A " + str(observed_purchase_rate_A))
# Total number of visitors in group A is: 1666
#The number of visitors in group A who made a purchase is: 316
#T8 Sample size and number of purchases - Group B $1.99
samp_size_199 = np.sum(abdata.group == 'B')
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_B = round(sales_199/ samp_size_199, 2)
print('Total number of visitors in group B is: ' + str(samp_size_199))
print('The number of visitors in group B who made a purchase is: ' + str(sales_199))
print("observed_purchase_rate_B " + str(observed_purchase_rate_B))
#T8 Sample size and number of purchases - Group C $4.99
samp_size_499 = np.sum(abdata.group == 'C')
sales_499 = np.sum((abdata.group == "C") & (abdata.is_purchase == 'Yes'))
observed_purchase_rate_C = round(sales_499/ samp_size_499, 2)
print('Total number of visitors in group C is: ' + str(samp_size_499))
print('The number of visitors in group C who made a purchase is: ' + str(sales_499))
print("observed_purchase_rate_C " + str(observed_purchase_rate_C))
#Total number of visitors in group A is: 1666
#The number of visitors in group A who made a purchase is: 316
#Total number of visitors in group B is: 1666
#The number of visitors in group B who made a purchase is: 183
#Total number of visitors in group C is: 1666
#The number of visitors in group C who made a purchase is: 83
#T10. Binom test for group A
from scipy.stats import binom_test
pval_A_2sided = binom_test(sales_099, samp_size_099, p_sales_needed_099)
pval_A_1sided = binom_test(sales_099, samp_size_099, p_sales_needed_099, alternative = 'greater')
print("pval_A_2sided is: " + str(pval_A_2sided))
print("pval_A_1sided is: " + str(pval_A_1sided))
#pval_A_2sided is: 0.29791642311457833
#pval_A_1sided is: 0.861100905910942
#pval_A > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
#T10. Binom test for group B $1.99
from scipy.stats import binom_test
pval_B_2sided = binom_test(sales_199, samp_size_199, p_sales_needed_199)
pval_B_1sided = binom_test(sales_199, samp_size_199, p_sales_needed_199, alternative = 'greater')
print("pval_B_2sided is: " + str(pval_B_2sided))
print("pval_B_1sided is: " + str(pval_B_1sided))
#pval_B_2sided is: 0.1778866596062784
#pval_B_1sided is: 0.0982588983603735
#pval_B > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference.
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target.
##T10. Binom test for group C $4.99
from scipy.stats import binom_test
pval_C_2sided = binom_test(sales_499, samp_size_499, p_sales_needed_499)
pval_C_1sided = binom_test(sales_499, samp_size_499, p_sales_needed_499, alternative = 'greater')
print("pval_C_2sided is: " + str(pval_C_2sided))
print("pval_C_1sided is: " + str(pval_C_1sided))
#observed_purchase_rate_C 0.05, p_sales_needed_499: 0.04
#pval_C_2sided is: 0.04517298955409145
#pval_C_1sided is: 0.02663954665996981
#pval_C < 0.05 (significance threshold), reject H_null, accept H-alt, conclude there is significant difference.
# The observed purchase rate is significantly greater or different than the purchase rate that results in the minimum revenue target.
#T13. Conclusion:
# Group C purchase rate is significantly higher than the target. Based on this information, Brian should charge $4.99 for the upgrade package.
# Import libraries
import codecademylib3
import pandas as pd
import numpy as np
# Read in the `clicks.csv` file as `abdata`
abdata = pd.read_csv('clicks.csv')
# Inspect the dataframe
print(abdata.head())
# Create a contingency table with pd.crosstab
Xtab = pd.crosstab(abdata.group, abdata.is_purchase)
# Print the contingency table
print(Xtab)
# Import chi2_contingency module
from scipy.stats import chi2_contingency
# Calculate the p-value
chi2, pval, dof, expected = chi2_contingency(Xtab)
# Print the p-value
print(pval)
# Determine if the p-value is significant
is_significant = True
# Calculate and print the number of visits
num_visits = len(abdata)
# Print the number of visits
print(num_visits)
# Calculate the purchase rate needed at 0.99
num_sales_needed_099 = 1000/0.99
p_sales_needed_099 = num_sales_needed_099/num_visits
# Print the purchase rate needed at 0.99
print(p_sales_needed_099)
# Calculate the purchase rate needed at 1.99
num_sales_needed_199 = 1000/1.99
p_sales_needed_199 = num_sales_needed_199/num_visits
# Print the purchase rate needed at 1.99
print(p_sales_needed_199)
# Calculate the purchase rate needed at 4.99
num_sales_needed_499 = 1000/4.99
p_sales_needed_499 = num_sales_needed_499/num_visits
# Print the purchase rate needed at 4.99
print(p_sales_needed_499)
# Calculate samp size & sales for 0.99 price point
samp_size_099 = np.sum(abdata.group == 'A')
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes'))
# Print samp size & sales for 0.99 price point
print(samp_size_099)
print(sales_099)
# Calculate samp size & sales for 1.99 price point
samp_size_199 = np.sum(abdata.group == 'B')
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes'))
# Print samp size & sales for 1.99 price point
print(samp_size_199)
print(sales_199)
# Calculate samp size & sales for 4.99 price point
samp_size_499 = np.sum(abdata.group == 'C')
sales_499 = np.sum((abdata.group == 'C') & (abdata.is_purchase == 'Yes'))
# Print samp size & sales for 4.99 price point
print(samp_size_499)
print(sales_499)
# Import the binom_test module
from scipy.stats import binom_test
# Calculate the p-value for Group A
pvalueA = binom_test(sales_099, n=samp_size_099, p=p_sales_needed_099, alternative='greater')
# Print the p-value for Group A
print(pvalueA)
# Calculate the p-value for Group B
pvalueB = binom_test(sales_199, n=samp_size_199, p=p_sales_needed_199, alternative='greater')
# Print the p-value for Group B
print(pvalueB)
# Calculate the p-value for Group C
pvalueC = binom_test(sales_499, n=samp_size_499, p=p_sales_needed_499, alternative='greater')
# Print the p-value for Group C
print(pvalueC)
# Set the correct value for the final answer variable
final_answer = '4.99'
# Print the chosen price group
print(final_answer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment