-
-
Save codecademydev/6f7540e9827c117bf7494e963ee3332f to your computer and use it in GitHub Desktop.
Codecademy export
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AB_Test _HYPOTHESIS TESTING¬_Analyzing Farmburg's Jan 2021 Ping | |
# Import libraries | |
import codecademylib3 | |
import pandas as pd | |
import numpy as np | |
# Read in the `clicks.csv` file as `abdata` | |
abdata = pd.read_csv('clicks.csv') | |
print(abdata.head()) | |
#T2. Chi-Square test - relationship between two categorical variables | |
import pandas as pd | |
from scipy.stats import chi2_contingency | |
# create a contingency table | |
contingency_group_purch = pd.crosstab(abdata.group, abdata.is_purchase) | |
print("contingency_group_purchase:") | |
print(contingency_group_purch) | |
#contingency_group_purchase: | |
#is_purchase No Yes | |
#group | |
#A 1350 316 | |
#B 1483 183 | |
#C 1583 83 | |
# Group A has the highest number of purchases, which is 316. | |
# run a chi-square test: | |
chi2, pval, dof, expected = chi2_contingency(contingency_group_purch) | |
print(" pval for contingency_group_purch is : " + str(pval)) | |
# pval for contingency_group_purch is : 2.4126213546684264e-35 < 0.05 significance threshold, reject H-null, accept H-alt, conclude significant difference. | |
# Yes, there is a significant difference in the purchase rate for group A, B and C. | |
#T5, number of visitors | |
num_visits = (abdata.user_id).nunique() | |
#num_visits = len(abdata) | |
print("Number of visitors each week: " + str(num_visits)) | |
# for $0.99 | |
num_sales_needed_099 = np.ceil(1000/ 0.99) | |
print("num_sales_needed_099: " + str(num_sales_needed_099)) | |
p_sales_needed_099 = round(num_sales_needed_099/ num_visits, 2) | |
print("p_sales_needed_099: " + str(p_sales_needed_099)) | |
# for $1.99 | |
num_sales_needed_199 = np.ceil(1000/ 1.99) | |
print("num_sales_needed_199: " + str(num_sales_needed_199)) | |
p_sales_needed_199 = round(num_sales_needed_199/ num_visits, 2) | |
print("p_sales_needed_199: " + str(p_sales_needed_199)) | |
# for $4.99 | |
num_sales_needed_499 = np.ceil(1000/ 4.99) | |
print("num_sales_needed_499: " + str(num_sales_needed_499)) | |
p_sales_needed_499 = round(num_sales_needed_499/ num_visits, 2) | |
print("p_sales_needed_499: " + str(p_sales_needed_499)) | |
#Number of visitors each week: 4998 | |
##num_sales_needed_099: 1011.0 | |
#p_sales_needed_099: 0.2 | |
#num_sales_needed_199: 503.0 | |
#p_sales_needed_199: 0.1 | |
#num_sales_needed_499: 201.0 | |
#p_sales_needed_499: 0.04 | |
#T8 Sample size and number of purchases - Group A | |
samp_size_099 = np.sum(abdata.group == 'A') | |
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes')) | |
observed_purchase_rate_A = round(sales_099/ samp_size_099, 2) | |
print('Total number of visitors in group A is: ' + str(samp_size_099)) | |
print('The number of visitors in group A who made a purchase is: ' + str(sales_099)) | |
print("observed_purchase_rate_A " + str(observed_purchase_rate_A)) | |
# Total number of visitors in group A is: 1666 | |
#The number of visitors in group A who made a purchase is: 316 | |
#T8 Sample size and number of purchases - Group B $1.99 | |
samp_size_199 = np.sum(abdata.group == 'B') | |
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes')) | |
observed_purchase_rate_B = round(sales_199/ samp_size_199, 2) | |
print('Total number of visitors in group B is: ' + str(samp_size_199)) | |
print('The number of visitors in group B who made a purchase is: ' + str(sales_199)) | |
print("observed_purchase_rate_B " + str(observed_purchase_rate_B)) | |
#T8 Sample size and number of purchases - Group C $4.99 | |
samp_size_499 = np.sum(abdata.group == 'C') | |
sales_499 = np.sum((abdata.group == "C") & (abdata.is_purchase == 'Yes')) | |
observed_purchase_rate_C = round(sales_499/ samp_size_499, 2) | |
print('Total number of visitors in group C is: ' + str(samp_size_499)) | |
print('The number of visitors in group C who made a purchase is: ' + str(sales_499)) | |
print("observed_purchase_rate_C " + str(observed_purchase_rate_C)) | |
#Total number of visitors in group A is: 1666 | |
#The number of visitors in group A who made a purchase is: 316 | |
#Total number of visitors in group B is: 1666 | |
#The number of visitors in group B who made a purchase is: 183 | |
#Total number of visitors in group C is: 1666 | |
#The number of visitors in group C who made a purchase is: 83 | |
#T10. Binom test for group A | |
from scipy.stats import binom_test | |
pval_A_2sided = binom_test(sales_099, samp_size_099, p_sales_needed_099) | |
pval_A_1sided = binom_test(sales_099, samp_size_099, p_sales_needed_099, alternative = 'greater') | |
print("pval_A_2sided is: " + str(pval_A_2sided)) | |
print("pval_A_1sided is: " + str(pval_A_1sided)) | |
#pval_A_2sided is: 0.29791642311457833 | |
#pval_A_1sided is: 0.861100905910942 | |
#pval_A > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference. | |
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target. | |
#T10. Binom test for group B $1.99 | |
from scipy.stats import binom_test | |
pval_B_2sided = binom_test(sales_199, samp_size_199, p_sales_needed_199) | |
pval_B_1sided = binom_test(sales_199, samp_size_199, p_sales_needed_199, alternative = 'greater') | |
print("pval_B_2sided is: " + str(pval_B_2sided)) | |
print("pval_B_1sided is: " + str(pval_B_1sided)) | |
#pval_B_2sided is: 0.1778866596062784 | |
#pval_B_1sided is: 0.0982588983603735 | |
#pval_B > 0.05 (significance threshold), accept H_null, reject H-alt, there is no significant difference. | |
# The observed purchase rate is not significantly greater or different than the purchase rate that results in the minimum revenue target. | |
##T10. Binom test for group C $4.99 | |
from scipy.stats import binom_test | |
pval_C_2sided = binom_test(sales_499, samp_size_499, p_sales_needed_499) | |
pval_C_1sided = binom_test(sales_499, samp_size_499, p_sales_needed_499, alternative = 'greater') | |
print("pval_C_2sided is: " + str(pval_C_2sided)) | |
print("pval_C_1sided is: " + str(pval_C_1sided)) | |
#observed_purchase_rate_C 0.05, p_sales_needed_499: 0.04 | |
#pval_C_2sided is: 0.04517298955409145 | |
#pval_C_1sided is: 0.02663954665996981 | |
#pval_C < 0.05 (significance threshold), reject H_null, accept H-alt, conclude there is significant difference. | |
# The observed purchase rate is significantly greater or different than the purchase rate that results in the minimum revenue target. | |
#T13. Conclusion: | |
# Group C purchase rate is significantly higher than the target. Based on this information, Brian should charge $4.99 for the upgrade package. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import codecademylib3 | |
import pandas as pd | |
import numpy as np | |
# Read in the `clicks.csv` file as `abdata` | |
abdata = pd.read_csv('clicks.csv') | |
# Inspect the dataframe | |
print(abdata.head()) | |
# Create a contingency table with pd.crosstab | |
Xtab = pd.crosstab(abdata.group, abdata.is_purchase) | |
# Print the contingency table | |
print(Xtab) | |
# Import chi2_contingency module | |
from scipy.stats import chi2_contingency | |
# Calculate the p-value | |
chi2, pval, dof, expected = chi2_contingency(Xtab) | |
# Print the p-value | |
print(pval) | |
# Determine if the p-value is significant | |
is_significant = True | |
# Calculate and print the number of visits | |
num_visits = len(abdata) | |
# Print the number of visits | |
print(num_visits) | |
# Calculate the purchase rate needed at 0.99 | |
num_sales_needed_099 = 1000/0.99 | |
p_sales_needed_099 = num_sales_needed_099/num_visits | |
# Print the purchase rate needed at 0.99 | |
print(p_sales_needed_099) | |
# Calculate the purchase rate needed at 1.99 | |
num_sales_needed_199 = 1000/1.99 | |
p_sales_needed_199 = num_sales_needed_199/num_visits | |
# Print the purchase rate needed at 1.99 | |
print(p_sales_needed_199) | |
# Calculate the purchase rate needed at 4.99 | |
num_sales_needed_499 = 1000/4.99 | |
p_sales_needed_499 = num_sales_needed_499/num_visits | |
# Print the purchase rate needed at 4.99 | |
print(p_sales_needed_499) | |
# Calculate samp size & sales for 0.99 price point | |
samp_size_099 = np.sum(abdata.group == 'A') | |
sales_099 = np.sum((abdata.group == 'A') & (abdata.is_purchase == 'Yes')) | |
# Print samp size & sales for 0.99 price point | |
print(samp_size_099) | |
print(sales_099) | |
# Calculate samp size & sales for 1.99 price point | |
samp_size_199 = np.sum(abdata.group == 'B') | |
sales_199 = np.sum((abdata.group == 'B') & (abdata.is_purchase == 'Yes')) | |
# Print samp size & sales for 1.99 price point | |
print(samp_size_199) | |
print(sales_199) | |
# Calculate samp size & sales for 4.99 price point | |
samp_size_499 = np.sum(abdata.group == 'C') | |
sales_499 = np.sum((abdata.group == 'C') & (abdata.is_purchase == 'Yes')) | |
# Print samp size & sales for 4.99 price point | |
print(samp_size_499) | |
print(sales_499) | |
# Import the binom_test module | |
from scipy.stats import binom_test | |
# Calculate the p-value for Group A | |
pvalueA = binom_test(sales_099, n=samp_size_099, p=p_sales_needed_099, alternative='greater') | |
# Print the p-value for Group A | |
print(pvalueA) | |
# Calculate the p-value for Group B | |
pvalueB = binom_test(sales_199, n=samp_size_199, p=p_sales_needed_199, alternative='greater') | |
# Print the p-value for Group B | |
print(pvalueB) | |
# Calculate the p-value for Group C | |
pvalueC = binom_test(sales_499, n=samp_size_499, p=p_sales_needed_499, alternative='greater') | |
# Print the p-value for Group C | |
print(pvalueC) | |
# Set the correct value for the final answer variable | |
final_answer = '4.99' | |
# Print the chosen price group | |
print(final_answer) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment