Skip to content

Instantly share code, notes, and snippets.

@jing-jin-mc
Created December 16, 2018 13:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jing-jin-mc/225057913131d9aa928dd8a5f8d3d9e7 to your computer and use it in GitHub Desktop.
Save jing-jin-mc/225057913131d9aa928dd8a5f8d3d9e7 to your computer and use it in GitHub Desktop.
Get AB test data and do a simple AB test Analysis on the result
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "# A Simple AB Test Analysis "
},
{
"metadata": {
"code_folding": [
0
],
"trusted": true
},
"cell_type": "code",
"source": "# Imports the Google Cloud client library\nfrom google.cloud import bigquery\nimport pandas as pd\nimport numpy as np\nimport scipy.stats\nimport statsmodels.stats.power as smp\nimport statsmodels.stats.proportion as sms\nimport matplotlib.pyplot as plt\nfrom IPython.display import display\npd.options.display.float_format = '{:.2f}'.format",
"execution_count": 22,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Instantiates a Bigquery client\nbigquery_client = bigquery.Client()",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Congif the test ID: 'Y_sXp3zqTuS02_sp31o8Pg' and set test date range\nstart_date = '20181027'\nend_date = '20181116'\nexperiment_id = 'Y_sXp3zqTuS02_sp31o8Pg'",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"code_folding": [
0
],
"trusted": true
},
"cell_type": "code",
"source": "# Write the BigQuery with the correct table name\n# Change the table name accordingly \nQUERY = \"\"\"\n WITH\n dataset AS(\n SELECT\n fullvisitorId AS clientId,\n visitId AS sessionId,\n test.experimentVariant AS variants,\n device.deviceCategory AS device,\n MAX (IF (h.eCommerceAction.action_type = '3',\n 1,\n 0)\n ) AS add_to_cart,\n IF (MAX(totals.transactions)>0,\n 1,\n 0) AS make_a_trans,\n IF (MAX(totals.transactionRevenue)>0,\n MAX(totals.transactionRevenue/1000000),\n 0) AS trans_rev\n FROM\n `bigquery-176409.109577992.ga_sessions_*`,\n UNNEST (hits) AS h,\n UNNEST(h.experiment) AS test\n WHERE\n _TABLE_SUFFIX BETWEEN @START_DATE\n AND @END_DATE\n AND test.experimentId = @EXPERIMENT_ID\n GROUP BY\n fullvisitorId,\n visitId,\n variants,\n device)\n SELECT\n clientId,\n variants,\n device,\n SUM(add_to_cart) AS num_atc,\n COUNT(sessionId) AS num_sess,\n SUM(make_a_trans) AS num_trans,\n SUM(trans_rev) AS rev\n FROM\n dataset\n GROUP BY\n clientId,\n variants,\n device\n \"\"\"",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# config parameters and get the data from BigQuery \nquery_params = [\n #bigquery.ScalarQueryParameter('TABLE_NAME', 'STRING', table_name),\n bigquery.ScalarQueryParameter('START_DATE', 'STRING', start_date),\n bigquery.ScalarQueryParameter('END_DATE', 'STRING', end_date),\n bigquery.ScalarQueryParameter('EXPERIMENT_ID', 'STRING', experiment_id)\n]\n\njob_config = bigquery.QueryJobConfig()\njob_config.query_parameters = query_params\nquery_job = bigquery_client.query(QUERY,job_config=job_config) \ndf = query_job.to_dataframe()\ndf.head()",
"execution_count": 12,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>clientId</th>\n <th>variants</th>\n <th>device</th>\n <th>num_atc</th>\n <th>num_sess</th>\n <th>num_trans</th>\n <th>rev</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>3456892674124422100</td>\n <td>0</td>\n <td>mobile</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0.00</td>\n </tr>\n <tr>\n <th>1</th>\n <td>4960553732248368850</td>\n <td>0</td>\n <td>mobile</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0.00</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3369147557328129456</td>\n <td>0</td>\n <td>mobile</td>\n <td>0</td>\n <td>4</td>\n <td>0</td>\n <td>0.00</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2474884608469966745</td>\n <td>0</td>\n <td>mobile</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0.00</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2667782236950514782</td>\n <td>1</td>\n <td>mobile</td>\n <td>0</td>\n <td>2</td>\n <td>0</td>\n <td>0.00</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " clientId variants device num_atc num_sess num_trans rev\n0 3456892674124422100 0 mobile 0 1 0 0.00\n1 4960553732248368850 0 mobile 0 1 0 0.00\n2 3369147557328129456 0 mobile 0 4 0 0.00\n3 2474884608469966745 0 mobile 0 1 0 0.00\n4 2667782236950514782 1 mobile 0 2 0 0.00"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"code_folding": [
0
],
"trusted": true
},
"cell_type": "code",
"source": "######### Function: Get data overview ##########\ndef get_data_overview(df,test_names): \n result_df = []\n result_dic = {}\n for name in test_names:\n test_tmp = df[df['variants'] == name]\n test_tmp_con = test_tmp[test_tmp['num_trans']>0]\n test_tmp_atc_con = test_tmp[test_tmp['num_atc']>0]\n cr = float(test_tmp_con.shape[0])/float(test_tmp.shape[0])\n atcr = float(test_tmp_atc_con.shape[0])/float(test_tmp.shape[0])\n tmp = {\"Test\":name,\n \"#Users\":test_tmp.shape[0],\n \"#Sess\":sum(test_tmp['num_sess']),\n \"∑Rev.\":sum(test_tmp['rev']),\n \"∑ATC\":sum(test_tmp['num_atc']),\n \"∑Trans.\":sum(test_tmp['num_trans']),\n \"#ATC./User\":test_tmp['num_atc'].mean(),\n \"Rev./Trans\":float(sum(test_tmp['rev']))/float(sum(test_tmp['num_trans'])),\n \"Rev./User\":test_tmp['rev'].mean(),\n \"#Trans/User.\":test_tmp['num_trans'].mean(),\n \"#Sess/User\":test_tmp['num_sess'].mean(),\n \"#ATC\":test_tmp_atc_con.shape[0],\n \"#Conveted\":test_tmp_con.shape[0],\n \"CR for User\":cr}\n result_df.append(tmp)\n result_dic[name] = tmp\n result_df = pd.DataFrame(result_df)\n result_df = result_df[['Test',\n '#Sess',\n '#Users',\n \"#ATC\",\n '#Conveted',\n '∑ATC',\n '∑Trans.',\n '∑Rev.',\n '#Sess/User',\n '#ATC./User',\n '#Trans/User.',\n 'Rev./User',\n 'Rev./Trans',\n 'CR for User']]\n return (result_df,result_dic) ",
"execution_count": 6,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "* ## KPIs for this AB test"
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Get the over view for the AB test\n# Step 1 get the general one\ntest_names = list(df['variants'].unique())\nprint ('AB test overview for all devices')\nresult_df,result_dic = get_data_overview(df,test_names)\ndisplay(result_df)\n# Step 2 get the overview for different devices\ndevice_list = list(df['device'].unique())\nfor i in range(len(device_list)):\n df_tmp = df[df['device']==device_list[i]]\n result_df_tmp,result_dic_tmp = get_data_overview(df_tmp,test_names)\n print (device_list[i] + ' : ' + 'AB test overview')\n display(result_df_tmp)",
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"text": "AB test overview for all devices\n",
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/plain": " Test #Sess #Users #ATC #Conveted ∑ATC ∑Trans. ∑Rev. \\\n0 0 110697 42534 26442 19062 51377 33506 38398788.37 \n1 1 112065 42221 26172 18895 51458 32732 36974001.34 \n\n #Sess/User #ATC./User #Trans/User. Rev./User Rev./Trans CR for User \n0 2.60 1.21 0.79 902.78 1146.03 0.45 \n1 2.65 1.22 0.78 875.73 1129.60 0.45 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Test</th>\n <th>#Sess</th>\n <th>#Users</th>\n <th>#ATC</th>\n <th>#Conveted</th>\n <th>∑ATC</th>\n <th>∑Trans.</th>\n <th>∑Rev.</th>\n <th>#Sess/User</th>\n <th>#ATC./User</th>\n <th>#Trans/User.</th>\n <th>Rev./User</th>\n <th>Rev./Trans</th>\n <th>CR for User</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>110697</td>\n <td>42534</td>\n <td>26442</td>\n <td>19062</td>\n <td>51377</td>\n <td>33506</td>\n <td>38398788.37</td>\n <td>2.60</td>\n <td>1.21</td>\n <td>0.79</td>\n <td>902.78</td>\n <td>1146.03</td>\n <td>0.45</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>112065</td>\n <td>42221</td>\n <td>26172</td>\n <td>18895</td>\n <td>51458</td>\n <td>32732</td>\n <td>36974001.34</td>\n <td>2.65</td>\n <td>1.22</td>\n <td>0.78</td>\n <td>875.73</td>\n <td>1129.60</td>\n <td>0.45</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
},
{
"output_type": "stream",
"text": "mobile : AB test overview\n",
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/plain": " Test #Sess #Users #ATC #Conveted ∑ATC ∑Trans. ∑Rev. \\\n0 0 46280 15271 9160 5958 19731 10371 11584811.53 \n1 1 46467 15345 9092 5934 19654 10169 11353816.45 \n\n #Sess/User #ATC./User #Trans/User. Rev./User Rev./Trans CR for User \n0 3.03 1.29 0.68 758.62 1117.04 0.39 \n1 3.03 1.28 0.66 739.90 1116.51 0.39 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Test</th>\n <th>#Sess</th>\n <th>#Users</th>\n <th>#ATC</th>\n <th>#Conveted</th>\n <th>∑ATC</th>\n <th>∑Trans.</th>\n <th>∑Rev.</th>\n <th>#Sess/User</th>\n <th>#ATC./User</th>\n <th>#Trans/User.</th>\n <th>Rev./User</th>\n <th>Rev./Trans</th>\n <th>CR for User</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>46280</td>\n <td>15271</td>\n <td>9160</td>\n <td>5958</td>\n <td>19731</td>\n <td>10371</td>\n <td>11584811.53</td>\n <td>3.03</td>\n <td>1.29</td>\n <td>0.68</td>\n <td>758.62</td>\n <td>1117.04</td>\n <td>0.39</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>46467</td>\n <td>15345</td>\n <td>9092</td>\n <td>5934</td>\n <td>19654</td>\n <td>10169</td>\n <td>11353816.45</td>\n <td>3.03</td>\n <td>1.28</td>\n <td>0.66</td>\n <td>739.90</td>\n <td>1116.51</td>\n <td>0.39</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
},
{
"output_type": "stream",
"text": "tablet : AB test overview\n",
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/plain": " Test #Sess #Users #ATC #Conveted ∑ATC ∑Trans. ∑Rev. #Sess/User \\\n0 0 14845 5949 4286 3421 7733 5866 6664929.71 2.50 \n1 1 15851 6004 4260 3443 8219 5907 6607298.65 2.64 \n\n #ATC./User #Trans/User. Rev./User Rev./Trans CR for User \n0 1.30 0.99 1120.34 1136.20 0.58 \n1 1.37 0.98 1100.48 1118.55 0.57 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Test</th>\n <th>#Sess</th>\n <th>#Users</th>\n <th>#ATC</th>\n <th>#Conveted</th>\n <th>∑ATC</th>\n <th>∑Trans.</th>\n <th>∑Rev.</th>\n <th>#Sess/User</th>\n <th>#ATC./User</th>\n <th>#Trans/User.</th>\n <th>Rev./User</th>\n <th>Rev./Trans</th>\n <th>CR for User</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>14845</td>\n <td>5949</td>\n <td>4286</td>\n <td>3421</td>\n <td>7733</td>\n <td>5866</td>\n <td>6664929.71</td>\n <td>2.50</td>\n <td>1.30</td>\n <td>0.99</td>\n <td>1120.34</td>\n <td>1136.20</td>\n <td>0.58</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>15851</td>\n <td>6004</td>\n <td>4260</td>\n <td>3443</td>\n <td>8219</td>\n <td>5907</td>\n <td>6607298.65</td>\n <td>2.64</td>\n <td>1.37</td>\n <td>0.98</td>\n <td>1100.48</td>\n <td>1118.55</td>\n <td>0.57</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
},
{
"output_type": "stream",
"text": "desktop : AB test overview\n",
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/plain": " Test #Sess #Users #ATC #Conveted ∑ATC ∑Trans. ∑Rev. \\\n0 0 49572 21314 12996 9683 23913 17269 20149047.13 \n1 1 49747 20872 12820 9518 23585 16656 19012886.24 \n\n #Sess/User #ATC./User #Trans/User. Rev./User Rev./Trans CR for User \n0 2.33 1.12 0.81 945.34 1166.78 0.45 \n1 2.38 1.13 0.80 910.93 1141.50 0.46 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Test</th>\n <th>#Sess</th>\n <th>#Users</th>\n <th>#ATC</th>\n <th>#Conveted</th>\n <th>∑ATC</th>\n <th>∑Trans.</th>\n <th>∑Rev.</th>\n <th>#Sess/User</th>\n <th>#ATC./User</th>\n <th>#Trans/User.</th>\n <th>Rev./User</th>\n <th>Rev./Trans</th>\n <th>CR for User</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0</td>\n <td>49572</td>\n <td>21314</td>\n <td>12996</td>\n <td>9683</td>\n <td>23913</td>\n <td>17269</td>\n <td>20149047.13</td>\n <td>2.33</td>\n <td>1.12</td>\n <td>0.81</td>\n <td>945.34</td>\n <td>1166.78</td>\n <td>0.45</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1</td>\n <td>49747</td>\n <td>20872</td>\n <td>12820</td>\n <td>9518</td>\n <td>23585</td>\n <td>16656</td>\n <td>19012886.24</td>\n <td>2.38</td>\n <td>1.13</td>\n <td>0.80</td>\n <td>910.93</td>\n <td>1141.50</td>\n <td>0.46</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {}
}
]
},
{
"metadata": {
"code_folding": [
0
],
"trusted": true
},
"cell_type": "code",
"source": "######### Function: Data Visualization ######### \ndef visionlise_data(df,percentile,test_names,kpi,bin):\n plt.figure(figsize=(10,5))\n upper = df[kpi].max()\n pp = int(np.percentile(df[kpi],percentile))\n lower = df[kpi].min()\n print (\"The maximun \"+ kpi + \" of a user : %d\"%upper)\n print (str(percentile) + \"% users with \"+ kpi + \" below: %d\"%pp)\n print (\"The minimun \"+ kpi + \" of a user : %d\"%lower)\n print ('********** Data Visualization for ' + str(percentile) + '% users ******')\n df_c = df[df[kpi] <= pp]\n bins = range(int(lower+bin),int(pp+bin),bin)\n for i in range(len(test_names)):\n data_tmp = df_c[df_c['variants'] == test_names[i]]\n data_tmp[kpi].plot(kind = 'hist',bins = bins,alpha = 0.5,label = test_names[i]) \n plt.ylabel('Frequency' ,\n fontsize = 14)\n plt.title('AB Test: Distribution of '+kpi,\n fontsize = 14) \n plt.grid()\n plt.legend()\n plt.show()\n return (lower,pp,upper)",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "######## Function: Z-test for Conversion Rate #######\ndef z_test_for_cr(result_dic,Hypothesis,alpha,test_names):\n print (\"The hypothes is: CR in the variant part is \" + Hypothesis + \" than the control part.\")\n count = np.array([result_dic[test_names[0]][\"#Conveted\"],result_dic[test_names[1]][\"#Conveted\"]])\n nobs = np.array([result_dic[test_names[0]][\"#Users\"],result_dic[test_names[1]][\"#Users\"]])\n z_score,p_value = sms.proportions_ztest(count,nobs,value = 0,alternative = Hypothesis)\n print (\"z_score : %.4f\"%z_score)\n print (\"p_value : %.4f\"%p_value)\n if p_value > alpha:\n print (\"The result is not significant. The hypothesis can not be nagted!\")\n else: \n print (\"The result is significant. The hypothesis can be nagted! \")\n \n es = sms.proportion_effectsize(float(count[0])/nobs[0], float(count[1])/nobs[1], method='normal')\n power = smp.NormalIndPower().solve_power(es, nobs1=nobs[0], ratio = float(nobs[1])/nobs[0], alpha = alpha, alternative=Hypothesis)\n print (\"power : %.4f\"%power)\n return ",
"execution_count": 25,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Check the conversion rate for the desktop \ndf_tmp = df[df['device']=='desktop']\nresult_df_tmp,result_dic_tmp = get_data_overview(df_tmp,test_names)\n# Significant level \nalpha = 0.1\n# Define hypothesis\nHypothesis = 'smaller'\nz_test_for_cr(result_dic,Hypothesis,alpha,test_names)",
"execution_count": 26,
"outputs": [
{
"output_type": "stream",
"text": "The hypothes is: CR in the variant part is smaller than the control part.\nz_score : 0.1853\np_value : 0.5735\nThe result is not significant. The hypothesis can not be nagted!\npower : 0.0712\n",
"name": "stdout"
}
]
},
{
"metadata": {
"code_folding": [
0
],
"trusted": true
},
"cell_type": "code",
"source": "# Mannwhitney_test \ndef Mannwhitney_test_for_values(df,Hypothesis,alpha,test_names,kpi):\n print (\"The hypothes is: \" + kpi +\" in the variant part is \" + Hypothesis + \" than the control part.\")\n for i in range(len(test_names)-1):\n data_A = df[df['variants'] == test_names[0]]\n data_B = df[df['variants'] == test_names[i+1]]\n u,p_value = scipy.stats.mannwhitneyu(data_A[kpi],data_B[kpi],alternative = Hypothesis)\n print (\"The p-value is: %.4f\"%p_value)\n if p_value > alpha:\n print (\"The result is not significant. The hypothesis can not be nagted!\")\n else: \n print (\"The result is significant. The hypothesis can be nagted! \")\n return ",
"execution_count": 14,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "* ## Significance Analysis for Revenue per User"
},
{
"metadata": {
"code_folding": [],
"scrolled": false,
"trusted": true
},
"cell_type": "code",
"source": "# Check the KPI (rev per user) distribution for thr test \npercentile = 98\nkpi = 'rev'\nbins = 100\n# Significant level \nalpha = 0.1\n# Define hypothesis\nHypothesis = \"greater\" #\"less\", \"two-sided\", \"greater\"\n# Step 1 get the general one\nprint ('For all devices')\nlower,pp,upper = visionlise_data(df,percentile,test_names,kpi,bins)\n\ndf_c = df[df[kpi] <= pp] # Here you remove the 1 percent data\nMannwhitney_test_for_values(df_c,Hypothesis,alpha,test_names,kpi)\nprint ()\n\n# Step 2 get the overview for different devices\nfor i in range(len(device_list)):\n df_tmp = df[df['device']==device_list[i]]\n print ('For ' + device_list[i] )\n #plt.subplot(1,3,i+1)\n lower_tmp,pp_tmp,upper_tmp = visionlise_data(df_tmp,percentile,test_names,kpi,bins)\n df_c_tmp = df_tmp[df_tmp[kpi] <= pp] # Here you remove the 1 percent data\n Mannwhitney_test_for_values(df_c_tmp,Hypothesis,alpha,test_names,kpi)\n print ()",
"execution_count": 16,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "For all devices\nThe maximun rev of a user : 59820\n98% users with rev below: 5427\nThe minimun rev of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: rev in the variant part is greater than the control part.\nThe p-value is: 0.2654\nThe result is not significant. The hypothesis can not be nagted!\n\nFor mobile\nThe maximun rev of a user : 33698\n98% users with rev below: 5097\nThe minimun rev of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: rev in the variant part is greater than the control part.\nThe p-value is: 0.2909\nThe result is not significant. The hypothesis can not be nagted!\n\nFor tablet\nThe maximun rev of a user : 59820\n98% users with rev below: 5501\nThe minimun rev of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: rev in the variant part is greater than the control part.\nThe p-value is: 0.3559\nThe result is not significant. The hypothesis can not be nagted!\n\nFor desktop\nThe maximun rev of a user : 48808\n98% users with rev below: 5650\nThe minimun rev of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: rev in the variant part is greater than the control part.\nThe p-value is: 0.4081\nThe result is not significant. The hypothesis can not be nagted!\n\n"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "* ## Significance Analysis for Every Order Value "
},
{
"metadata": {
"code_folding": [
0
],
"scrolled": false,
"trusted": true
},
"cell_type": "code",
"source": "# Check the KPI (every order value) distribution for thr test \n# calculate the rev per transaction (every order value)\ndf['Rev./Trans'] = df.apply(lambda x: x['rev']/x['num_trans'] if x['num_trans']!= 0 else 0, axis = 1)\npercentile = 98\nkpi = 'Rev./Trans'\nbins = 100\n# Significant level \nalpha = 0.1\n# Define hypothesis\nHypothesis = \"less\" #\"less\", \"two-sided\", \"greater\"\n# Step 1 get the general one\nprint ('For all devices')\nlower,pp,upper = visionlise_data(df,percentile,test_names,kpi,bins)\n\ndf_c = df[df[kpi] <= pp] # Here you remove the 1 percent data\nMannwhitney_test_for_values(df_c,Hypothesis,alpha,test_names,kpi)\nprint ()\n\n# Step 2 get the overview for different devices\nfor i in range(len(device_list)):\n df_tmp = df[df['device']==device_list[i]]\n print ('For ' + device_list[i] )\n #plt.subplot(1,3,i+1)\n lower_tmp,pp_tmp,upper_tmp = visionlise_data(df_tmp,percentile,test_names,kpi,bins)\n df_c_tmp = df_tmp[df_tmp[kpi] <= pp] # Here you remove the 1 percent data\n Mannwhitney_test_for_values(df_c_tmp,Hypothesis,alpha,test_names,kpi)\n print ()",
"execution_count": 13,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "For all devices\nThe maximun Rev./Trans of a user : 13999\n98% users with Rev./Trans below: 2306\nThe minimun Rev./Trans of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: Rev./Trans in the variant part is less than the control part.\nThe p-value is: 0.7239\nThe result is not significant. The hypothesis can not be nagted!\n\nFor mobile\nThe maximun Rev./Trans of a user : 13999\n98% users with Rev./Trans below: 2180\nThe minimun Rev./Trans of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: Rev./Trans in the variant part is less than the control part.\nThe p-value is: 0.6773\nThe result is not significant. The hypothesis can not be nagted!\n\nFor tablet\nThe maximun Rev./Trans of a user : 6321\n98% users with Rev./Trans below: 2410\nThe minimun Rev./Trans of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: Rev./Trans in the variant part is less than the control part.\nThe p-value is: 0.7277\nThe result is not significant. The hypothesis can not be nagted!\n\nFor desktop\nThe maximun Rev./Trans of a user : 12348\n98% users with Rev./Trans below: 2367\nThe minimun Rev./Trans of a user : 0\n********** Data Visualization for 98% users ******\n"
},
{
"data": {
"image/png": "\n",
"text/plain": "<Figure size 720x360 with 1 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": "The hypothes is: Rev./Trans in the variant part is less than the control part.\nThe p-value is: 0.5819\nThe result is not significant. The hypothesis can not be nagted!\n\n"
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"latex_envs": {
"eqNumInitial": 1,
"eqLabelWithNumbers": true,
"current_citInitial": 1,
"cite_by": "apalike",
"bibliofile": "biblio.bib",
"LaTeX_envs_menu_present": true,
"labels_anchors": false,
"latex_user_defs": false,
"user_envs_cfg": false,
"report_style_numbering": false,
"autoclose": false,
"autocomplete": true,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
}
},
"gist": {
"id": "b9796cb531aa43564ebba5386b121a17",
"data": {
"description": "Get AB test data and do a simple AB test Analysis on the result",
"public": true
}
},
"_draft": {
"nbviewer_url": "https://gist.github.com/b9796cb531aa43564ebba5386b121a17"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment