tonicanada/20220409_understanding_clt_and_ttest_part7.ipynb

## 20220409_understanding_clt_and_ttest_part7.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First we have to compute ${\\overline{x}_{\\text{men}}}$, ${s_{\\text{men}}}$, ${n_{\\text{men}}}$ and ${\\overline{x}_{\\text{women}}}$, ${s_{\\text{women}}}$, ${n_{\\text{women}}}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_men = np.mean(example3_data[\"men\"])\n",
    "s_men = np.std(example3_data[\"men\"], ddof=1)\n",
    "n_men = len(example3_data[\"men\"])\n",
    "x_women = np.mean(example3_data[\"women\"])\n",
    "s_women = np.std(example3_data[\"women\"], ddof=1)\n",
    "n_women = len(example3_data[\"women\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    \"x_men\": 14.946153846153846,\n",
      "    \"s_men\": 6.842589103623397,\n",
      "    \"n_men\": 13,\n",
      "    \"x_women\": 22.29,\n",
      "    \"s_women\": 5.319659554687478,\n",
      "    \"n_women\": 10\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "params_example3 = {\n",
    "    \"x_men\": x_men,\n",
    "    \"s_men\": s_men,\n",
    "    \"n_men\": n_men,\n",
    "    \"x_women\": x_women,\n",
    "    \"s_women\": s_women,\n",
    "    \"n_women\": n_women\n",
    "}\n",
    "print(json.dumps(params_example3, indent=4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we compute the standard error with formula seen before:\n",
    " $$\n",
    "t = \\frac{\\overline{x}_1-\\overline{x}_2}{s_p \\ \\sqrt{\\frac{1}{n_1}+\\frac{1}{n_2}}}\n",
    "$$\n",
    "where:\n",
    "$$\n",
    "s_p = \\sqrt{\\frac{(n_1-1) \\ s^2_{x_1}+ (n_2-1) \\ s^2_{x_2}}{n_1 + n_2 -2}}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6.235617003466411\n"
     ]
    }
   ],
   "source": [
    "sp_value_example3 = np.sqrt(((n_men-1)*s_men**2+(n_women-1)*s_women**2)/(n_men+n_women-2))\n",
    "print(sp_value_example3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.622839252120241\n"
     ]
    }
   ],
   "source": [
    "se_example3 = (sp_value_example3*np.sqrt(n_men**(-1) + n_women**(-1)))\n",
    "print(se_example3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.7999604428329192\n"
     ]
    }
   ],
   "source": [
    "t_value_example3 = abs((x_men-x_women)/se_example3)\n",
    "print(t_value_example3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21\n"
     ]
    }
   ],
   "source": [
    "dof_example3 = n_men + n_women - 2\n",
    "print(dof_example3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.005365303952098963"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Using t-student distribtion\n",
    "t.sf(t_value_example3, df=dof_example3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$\n",
    "\\frac{\\alpha}{2} = \\frac{0.05}{2} = 0.025 \\implies \\text{p_value} < 0.025 \\implies 0.0054 < 0.025 \\implies \\text{We can reject H0}\n",
    "$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can reject H0, so we can conclude with 5% of significance that $\\mu_{\\text{men}} \\neq \\mu_{\\text{women}}$"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"First we have to compute ${\\overline{x}_{\\text{men}}}$, ${s_{\\text{men}}}$, ${n_{\\text{men}}}$ and ${\\overline{x}_{\\text{women}}}$, ${s_{\\text{women}}}$, ${n_{\\text{women}}}$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 38,
	"metadata": {},
	"outputs": [],
	"source": [
	"x_men = np.mean(example3_data[\"men\"])\n",
	"s_men = np.std(example3_data[\"men\"], ddof=1)\n",
	"n_men = len(example3_data[\"men\"])\n",
	"x_women = np.mean(example3_data[\"women\"])\n",
	"s_women = np.std(example3_data[\"women\"], ddof=1)\n",
	"n_women = len(example3_data[\"women\"])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"{\n",
	" \"x_men\": 14.946153846153846,\n",
	" \"s_men\": 6.842589103623397,\n",
	" \"n_men\": 13,\n",
	" \"x_women\": 22.29,\n",
	" \"s_women\": 5.319659554687478,\n",
	" \"n_women\": 10\n",
	"}\n"
	]
	}
	],
	"source": [
	"params_example3 = {\n",
	" \"x_men\": x_men,\n",
	" \"s_men\": s_men,\n",
	" \"n_men\": n_men,\n",
	" \"x_women\": x_women,\n",
	" \"s_women\": s_women,\n",
	" \"n_women\": n_women\n",
	"}\n",
	"print(json.dumps(params_example3, indent=4))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now we compute the standard error with formula seen before:\n",
	" $$\n",
	"t = \\frac{\\overline{x}_1-\\overline{x}_2}{s_p \\ \\sqrt{\\frac{1}{n_1}+\\frac{1}{n_2}}}\n",
	"$$\n",
	"where:\n",
	"$$\n",
	"s_p = \\sqrt{\\frac{(n_1-1) \\ s^2_{x_1}+ (n_2-1) \\ s^2_{x_2}}{n_1 + n_2 -2}}\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 40,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"6.235617003466411\n"
	]
	}
	],
	"source": [
	"sp_value_example3 = np.sqrt(((n_men-1)s_men2+(n_women-1)s_women**2)/(n_men+n_women-2))\n",
	"print(sp_value_example3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 41,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2.622839252120241\n"
	]
	}
	],
	"source": [
	"se_example3 = (sp_value_example3np.sqrt(n_men(-1) + n_women*(-1)))\n",
	"print(se_example3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"2.7999604428329192\n"
	]
	}
	],
	"source": [
	"t_value_example3 = abs((x_men-x_women)/se_example3)\n",
	"print(t_value_example3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 43,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"21\n"
	]
	}
	],
	"source": [
	"dof_example3 = n_men + n_women - 2\n",
	"print(dof_example3)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 45,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.005365303952098963"
	]
	},
	"execution_count": 45,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"#Using t-student distribtion\n",
	"t.sf(t_value_example3, df=dof_example3)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"$$\n",
	"\\frac{\\alpha}{2} = \\frac{0.05}{2} = 0.025 \\implies \\text{p_value} < 0.025 \\implies 0.0054 < 0.025 \\implies \\text{We can reject H0}\n",
	"$$"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"We can reject H0, so we can conclude with 5% of significance that $\\mu_{\\text{men}} \\neq \\mu_{\\text{women}}$"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}