Skip to content

Instantly share code, notes, and snippets.

@ANJANIGITHUB
Created August 5, 2020 16:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ANJANIGITHUB/8143e093e3c2bf698508fb0c8381c94c to your computer and use it in GitHub Desktop.
Save ANJANIGITHUB/8143e093e3c2bf698508fb0c8381c94c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1-Sample T-test"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#To compare population mean and sample means if they are same or different\n",
"\n",
"#Null Hypothesis says that there is no difference in mean of a population and sample.\n",
"#Alternate Hypothesis says there is difference in means of a population and sample"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import ttest_1samp,ttest_ind,norm,ttest_rel\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"ages=[10,20,35,50,28,40,55,18,16,55,30,25,43,18,30,28,14,24,16,17,32,35,26,27,65,18,43,23,21,20,19,70]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"population_mean=np.mean(ages)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30.34375"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"population_mean"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"sample_population=np.random.choice(ages,10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([30, 23, 30, 65, 18, 24, 16, 20, 23, 20])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_population"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"26.9"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sample_mean=np.mean(sample_population)\n",
"sample_mean"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"one_sample_ttest,pvalue=ttest_1samp(sample_population,population_mean)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.7694263435329401"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_sample_ttest"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.46135557443352215"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pvalue"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accept Null Hypothesis.There is Difference in Means \n"
]
}
],
"source": [
"if pvalue<=0.05:\n",
" print('Reject Null Hypothesis.No Difference in Means ')\n",
"else:\n",
" print('Accept Null Hypothesis.There is Difference in Means ')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2-Sample T-Test or Independent T-test"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#NULL Hypotheses : Means of two groups are Not same\n",
"#Alternate Hypotheses : Means of two groups are same"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"#2 Sample T-test will compare two independent means of two features to validate if both are same or different "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"classA_Ages=np.random.randint(12,high=18,size=10)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([17, 14, 12, 17, 13, 12, 15, 14, 13, 16])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classA_Ages"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"classB_Ages=np.random.randint(12,high=18,size=10)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([17, 15, 15, 14, 16, 16, 16, 13, 12, 13])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classB_Ages"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"classA_Ages_mean=np.mean(classA_Ages)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"classB_Ages_mean=np.mean(classB_Ages)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.3"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classA_Ages_mean"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.7"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classB_Ages_mean"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"two_sample_t_test_stats,p_val=ttest_ind(a=classA_Ages,b=classB_Ages,equal_var =False)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.5061894487315155"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"two_sample_t_test_stats"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.6189904093635026"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p_val"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Do Not Reject Null Hypothesis.Means are Not same\n"
]
}
],
"source": [
"if p_val<=0.05:\n",
" print('Reject Null Hypothesis.Means are same')\n",
"else:\n",
" print('Do Not Reject Null Hypothesis.Means are Not same')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Paired T-test"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"#We go for paired t-test when we want to compare different means of same group taken at differnt time interval"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"weight1=[25,30,28,35,28,34,26,29,30,26,28,32,31,30,45]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"weight2=weight1 +norm.rvs(scale=5,loc=-1.25,size=15)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[25, 30, 28, 35, 28, 34, 26, 29, 30, 26, 28, 32, 31, 30, 45]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weight1"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([24.52960561, 23.83188231, 21.88564266, 31.57268761, 22.60292559,\n",
" 31.20880647, 28.1443266 , 33.60569785, 20.80937386, 27.47027227,\n",
" 31.49986073, 27.67681776, 27.90935884, 28.43787232, 39.15093676])"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weight2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create Data Frame"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"weights_df=pd.DataFrame({'weight1':np.array(weight1),\n",
" 'weight2':np.array(weight2),\n",
" 'weight_diff':np.array(weight2)-np.array(weight1)})"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>weight1</th>\n",
" <th>weight2</th>\n",
" <th>weight_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>25</td>\n",
" <td>24.529606</td>\n",
" <td>-0.470394</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>30</td>\n",
" <td>23.831882</td>\n",
" <td>-6.168118</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>28</td>\n",
" <td>21.885643</td>\n",
" <td>-6.114357</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>35</td>\n",
" <td>31.572688</td>\n",
" <td>-3.427312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>22.602926</td>\n",
" <td>-5.397074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>34</td>\n",
" <td>31.208806</td>\n",
" <td>-2.791194</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>26</td>\n",
" <td>28.144327</td>\n",
" <td>2.144327</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>29</td>\n",
" <td>33.605698</td>\n",
" <td>4.605698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>30</td>\n",
" <td>20.809374</td>\n",
" <td>-9.190626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>26</td>\n",
" <td>27.470272</td>\n",
" <td>1.470272</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>28</td>\n",
" <td>31.499861</td>\n",
" <td>3.499861</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>32</td>\n",
" <td>27.676818</td>\n",
" <td>-4.323182</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>31</td>\n",
" <td>27.909359</td>\n",
" <td>-3.090641</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>30</td>\n",
" <td>28.437872</td>\n",
" <td>-1.562128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>45</td>\n",
" <td>39.150937</td>\n",
" <td>-5.849063</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" weight1 weight2 weight_diff\n",
"0 25 24.529606 -0.470394\n",
"1 30 23.831882 -6.168118\n",
"2 28 21.885643 -6.114357\n",
"3 35 31.572688 -3.427312\n",
"4 28 22.602926 -5.397074\n",
"5 34 31.208806 -2.791194\n",
"6 26 28.144327 2.144327\n",
"7 29 33.605698 4.605698\n",
"8 30 20.809374 -9.190626\n",
"9 26 27.470272 1.470272\n",
"10 28 31.499861 3.499861\n",
"11 32 27.676818 -4.323182\n",
"12 31 27.909359 -3.090641\n",
"13 30 28.437872 -1.562128\n",
"14 45 39.150937 -5.849063"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weights_df"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"stats,p_val=ttest_rel(a=weight1,b=weight2)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2.365145888204346"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stats"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.03299626791354203"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"p_val"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reject Null Hypothesis.Means are same\n"
]
}
],
"source": [
"if p_val<=0.05:\n",
" print('Reject Null Hypothesis.Means are same')\n",
"else:\n",
" print('Do Not Reject Null Hypothesis.Means are Not same')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment