pushkarsaini18/hypothesis assignment.ipynb

## hypothesis assignment.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "id": "61a2dee8",
      "cell_type": "code",
      "source": "#using CUtlets data\n\n\n#A F&B manager wants to determine whether there is any significant difference in the diameter of the cutlet between two units. A randomly selected sample of cutlets was collected from both units and measured? Analyze the data and draw inferences at 5% significance level. Please state the assumptions and tests that you carried out to check validity of the assumptions.\n\n\n#import the basic library \nimport numpy as np \nimport pandas as pd",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "54ec80e8",
      "cell_type": "code",
      "source": "#load data set \ncutlets=pd.read_csv(\"Cutlets.csv\")",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "b3b90eaf",
      "cell_type": "code",
      "source": "#check for columns name to divide data into two diff dataset\ncutlets.columns",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "Index(['Unit A', 'Unit B'], dtype='object')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "458f7ada",
      "cell_type": "code",
      "source": "#column 1 store in unitA\nunitA=cutlets[[\"Unit A\"]]",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "a8d2fd34",
      "cell_type": "code",
      "source": "unitA.head()",
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 57,
          "data": {
            "text/plain": "   Unit A\n0  6.8090\n1  6.4376\n2  6.9157\n3  7.3012\n4  7.4488",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Unit A</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>6.8090</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6.4376</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>6.9157</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>7.3012</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>7.4488</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "aaa54ea9",
      "cell_type": "code",
      "source": "#column 2 store in unitB\nunitB=cutlets[['Unit B']]",
      "execution_count": 7,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "c78615ba",
      "cell_type": "code",
      "source": "#we can apply two-sample t test to find the significant differnce \nfrom scipy import stats\nstats.ttest_ind(unitA,unitB)\n",
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([0.72286887]), pvalue=array([0.47223947]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "d8a708ba",
      "cell_type": "code",
      "source": "p_value = 0.4722",
      "execution_count": 11,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "a5846fdb",
      "cell_type": "code",
      "source": "p_value",
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 12,
          "data": {
            "text/plain": "0.4722"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "id": "8a8f5857",
      "cell_type": "code",
      "source": "# Ho=there is no significant difference between unit A and unit B\n# Ha=there is significant difference between unit A and unit B\n# at 5% signicance level \n# P_vlaue > significant value \n# Hence we accept null hypothesis ",
      "execution_count": 17,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "\n\n",
      "execution_count": 16,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "#using LAbTAT data \n\n#   A hospital wants to determine whether there is any difference in the average Turn Around Time (TAT) of reports of the laboratories on their preferred list. They collected a random sample and recorded TAT for reports of 4 laboratories. TAT is defined as sample collected to report dispatch.\n   \n# Analyze the data and determine whether there is any difference in average TAT among the different laboratories at 5% significance level.\n",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# H0= there is no difference between TAT in all lab \n# Ha= there is significant diff between TAT in all lab ",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab= pd.read_csv(\"LabTAT.csv\")",
      "execution_count": 18,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab.head()",
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 19,
          "data": {
            "text/plain": "   Laboratory 1  Laboratory 2  Laboratory 3  Laboratory 4\n0        185.35        165.53        176.70        166.13\n1        170.49        185.91        198.45        160.79\n2        192.77        194.92        201.23        185.18\n3        177.33        183.00        199.61        176.42\n4        193.41        169.57        204.63        152.60",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Laboratory 1</th>\n      <th>Laboratory 2</th>\n      <th>Laboratory 3</th>\n      <th>Laboratory 4</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>185.35</td>\n      <td>165.53</td>\n      <td>176.70</td>\n      <td>166.13</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>170.49</td>\n      <td>185.91</td>\n      <td>198.45</td>\n      <td>160.79</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>192.77</td>\n      <td>194.92</td>\n      <td>201.23</td>\n      <td>185.18</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>177.33</td>\n      <td>183.00</td>\n      <td>199.61</td>\n      <td>176.42</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>193.41</td>\n      <td>169.57</td>\n      <td>204.63</td>\n      <td>152.60</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab1=lab[[\"Laboratory 1\"]]",
      "execution_count": 21,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab2=lab[[\"Laboratory 2\"]]",
      "execution_count": 22,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab3=lab[[\"Laboratory 3\"]]",
      "execution_count": 23,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab4=lab[[\"Laboratory 4\"]]",
      "execution_count": 24,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lab4.head()",
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 25,
          "data": {
            "text/plain": "   Laboratory 4\n0        166.13\n1        160.79\n2        185.18\n3        176.42\n4        152.60",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Laboratory 4</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>166.13</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>160.79</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>185.18</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>176.42</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>152.60</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab1,lab2)\n\n#p_value is greater than significance value we accept null hypothesis",
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 27,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([-0.29752168]), pvalue=array([0.76632779]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab1,lab3)\n\n#p_value is less than significance value we accept alternative hypothesis",
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 28,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([-11.16547802]), pvalue=array([1.48852644e-23]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab1,lab4)\n\n#p_value is less than significance value we accept alernative hypothesis",
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 29,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([8.02886396]), pvalue=array([4.52013705e-14]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab2,lab3)\n\n#p_value is less than significance value we accept alternative hypothesis",
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 30,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([-10.32129161]), pvalue=array([7.0291249e-21]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab2,lab4)\n#p_value is less than significance value we accept alternative hypothesis",
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 31,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([7.84855037]), pvalue=array([1.42616937e-13]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.ttest_ind(lab3,lab4)\n\n#p_value is less than significance value we accept alternative hypothesis",
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 32,
          "data": {
            "text/plain": "Ttest_indResult(statistic=array([17.72976907]), pvalue=array([2.1080551e-45]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# using Buyer ratio data \n\n\n#Sales of products in four different regions is tabulated for males and females. Find if male-female buyer rations are similar across regions.\n\n\n# H0= there is no difference between in male-female ratio\n# Ha= there is significant diff between in male-female ratio",
      "execution_count": 34,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "buy=pd.read_csv(\"BuyerRatio.csv\")",
      "execution_count": 35,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "buy.head()",
      "execution_count": 36,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 36,
          "data": {
            "text/plain": "  Observed Values  East  West  North  South\n0           Males    50   142    131     70\n1         Females   435  1523   1356    750",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Observed Values</th>\n      <th>East</th>\n      <th>West</th>\n      <th>North</th>\n      <th>South</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Males</td>\n      <td>50</td>\n      <td>142</td>\n      <td>131</td>\n      <td>70</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Females</td>\n      <td>435</td>\n      <td>1523</td>\n      <td>1356</td>\n      <td>750</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df=pd.DataFrame(buy)\n",
      "execution_count": 50,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "df.columns\n",
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 54,
          "data": {
            "text/plain": "Index(['Observed Values', 'East', 'West', 'North', 'South'], dtype='object')"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# We can apply anova test to find out the similarity between male-female buyer rations\n\nimport scipy.stats as stats\nstats.f_oneway(df.iloc[0:,[1,4]], df.iloc[1:,[1,4]])",
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 55,
          "data": {
            "text/plain": "F_onewayResult(statistic=array([0.33333333, 0.33333333]), pvalue=array([0.66666667, 0.66666667]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "stats.norm.cdf(0.05)",
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 47,
          "data": {
            "text/plain": "0.5199388058383725"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# p_value= 0.666\n# P_value is greater than significant value hence we accept null hypothesis \n",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "\n#use customer order form data \n\n# TeleCall uses 4 centers around the globe to process customer order forms. They audit a certain %  of the customer order forms. Any error in order form renders it defective and has to be reworked before processing.  The manager wants to check whether the defective %  varies by centre. Please analyze the data at 5% significance level and help the manager draw appropriate inferences\n\n\n\nimport seaborn as sns\nfrom sklearn.preprocessing import LabelEncoder\n\ncx=pd.read_csv(\"Costomer+OrderForm.csv\")",
      "execution_count": 84,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "cx.head()",
      "execution_count": 83,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 83,
          "data": {
            "text/plain": "  Phillippines   Indonesia       Malta       India\n0   Error Free  Error Free   Defective  Error Free\n1   Error Free  Error Free  Error Free   Defective\n2   Error Free   Defective   Defective  Error Free\n3   Error Free  Error Free  Error Free  Error Free\n4   Error Free  Error Free   Defective  Error Free",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Phillippines</th>\n      <th>Indonesia</th>\n      <th>Malta</th>\n      <th>India</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Defective</td>\n      <td>Error Free</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Defective</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Error Free</td>\n      <td>Defective</td>\n      <td>Defective</td>\n      <td>Error Free</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Error Free</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Error Free</td>\n      <td>Error Free</td>\n      <td>Defective</td>\n      <td>Error Free</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "#Before performig Ch-Square test we have to make sure data is label encoded\n\n\nlabel_encoder = LabelEncoder()\ncx['Phillippines'] = label_encoder.fit_transform(cx['Phillippines'])\ncx['Indonesia'] = label_encoder.fit_transform(cx['Indonesia'])\ncx['Malta'] = label_encoder.fit_transform(cx['Phillippines'])\ncx['India'] = label_encoder.fit_transform(cx['Indonesia'])",
      "execution_count": 85,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "cx.info()",
      "execution_count": 99,
      "outputs": [
        {
          "output_type": "stream",
          "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 300 entries, 0 to 299\nData columns (total 4 columns):\n #   Column        Non-Null Count  Dtype\n---  ------        --------------  -----\n 0   Phillippines  300 non-null    int32\n 1   Indonesia     300 non-null    int32\n 2   Malta         300 non-null    int64\n 3   India         300 non-null    int64\ndtypes: int32(2), int64(2)\nmemory usage: 7.2 KB\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# n=300 df=299\n# alpha= 0.05",
      "execution_count": 111,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "print(\"chi stats:\" , chi_ref[0])\nprint(\"P_value:\" , chi_ref[1])",
      "execution_count": 110,
      "outputs": [
        {
          "output_type": "stream",
          "text": "chi stats: 0.1856391005881107\nP_value: 0.6665712150680798\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.8",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "varInspector": {
      "window_display": false,
      "cols": {
        "lenName": 16,
        "lenType": 16,
        "lenVar": 40
      },
      "kernels_config": {
        "python": {
          "library": "var_list.py",
          "delete_cmd_prefix": "del ",
          "delete_cmd_postfix": "",
          "varRefreshCmd": "print(var_dic_list())"
        },
        "r": {
          "library": "var_list.r",
          "delete_cmd_prefix": "rm(",
          "delete_cmd_postfix": ") ",
          "varRefreshCmd": "cat(var_dic_list()) "
        }
      },
      "types_to_exclude": [
        "module",
        "function",
        "builtin_function_or_method",
        "instance",
        "_Feature"
      ]
    },
    "gist": {
      "id": "dfd2638ac2f36a5c6df3eb9d3778c124",
      "data": {
        "description": "hypertesting .ipynb",
        "public": true
      }
    },
    "_draft": {
      "nbviewer_url": "https://gist.github.com/dfd2638ac2f36a5c6df3eb9d3778c124"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"id": "61a2dee8",
	"cell_type": "code",
	"source": "#using CUtlets data\n\n\n#A F&B manager wants to determine whether there is any significant difference in the diameter of the cutlet between two units. A randomly selected sample of cutlets was collected from both units and measured? Analyze the data and draw inferences at 5% significance level. Please state the assumptions and tests that you carried out to check validity of the assumptions.\n\n\n#import the basic library \nimport numpy as np \nimport pandas as pd",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "54ec80e8",
	"cell_type": "code",
	"source": "#load data set \ncutlets=pd.read_csv(\"Cutlets.csv\")",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "b3b90eaf",
	"cell_type": "code",
	"source": "#check for columns name to divide data into two diff dataset\ncutlets.columns",
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 4,
	"data": {
	"text/plain": "Index(['Unit A', 'Unit B'], dtype='object')"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "458f7ada",
	"cell_type": "code",
	"source": "#column 1 store in unitA\nunitA=cutlets[[\"Unit A\"]]",
	"execution_count": 5,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "a8d2fd34",
	"cell_type": "code",
	"source": "unitA.head()",
	"execution_count": 57,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 57,
	"data": {
	"text/plain": " Unit A\n0 6.8090\n1 6.4376\n2 6.9157\n3 7.3012\n4 7.4488",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Unit A</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>6.8090</td>\n </tr>\n <tr>\n <th>1</th>\n <td>6.4376</td>\n </tr>\n <tr>\n <th>2</th>\n <td>6.9157</td>\n </tr>\n <tr>\n <th>3</th>\n <td>7.3012</td>\n </tr>\n <tr>\n <th>4</th>\n <td>7.4488</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "aaa54ea9",
	"cell_type": "code",
	"source": "#column 2 store in unitB\nunitB=cutlets[['Unit B']]",
	"execution_count": 7,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "c78615ba",
	"cell_type": "code",
	"source": "#we can apply two-sample t test to find the significant differnce \nfrom scipy import stats\nstats.ttest_ind(unitA,unitB)\n",
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 10,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([0.72286887]), pvalue=array([0.47223947]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "d8a708ba",
	"cell_type": "code",
	"source": "p_value = 0.4722",
	"execution_count": 11,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "a5846fdb",
	"cell_type": "code",
	"source": "p_value",
	"execution_count": 12,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 12,
	"data": {
	"text/plain": "0.4722"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"id": "8a8f5857",
	"cell_type": "code",
	"source": "# Ho=there is no significant difference between unit A and unit B\n# Ha=there is significant difference between unit A and unit B\n# at 5% signicance level \n# P_vlaue > significant value \n# Hence we accept null hypothesis ",
	"execution_count": 17,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "\n\n",
	"execution_count": 16,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "#using LAbTAT data \n\n# A hospital wants to determine whether there is any difference in the average Turn Around Time (TAT) of reports of the laboratories on their preferred list. They collected a random sample and recorded TAT for reports of 4 laboratories. TAT is defined as sample collected to report dispatch.\n \n# Analyze the data and determine whether there is any difference in average TAT among the different laboratories at 5% significance level.\n",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# H0= there is no difference between TAT in all lab \n# Ha= there is significant diff between TAT in all lab ",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab= pd.read_csv(\"LabTAT.csv\")",
	"execution_count": 18,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab.head()",
	"execution_count": 19,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 19,
	"data": {
	"text/plain": " Laboratory 1 Laboratory 2 Laboratory 3 Laboratory 4\n0 185.35 165.53 176.70 166.13\n1 170.49 185.91 198.45 160.79\n2 192.77 194.92 201.23 185.18\n3 177.33 183.00 199.61 176.42\n4 193.41 169.57 204.63 152.60",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Laboratory 1</th>\n <th>Laboratory 2</th>\n <th>Laboratory 3</th>\n <th>Laboratory 4</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>185.35</td>\n <td>165.53</td>\n <td>176.70</td>\n <td>166.13</td>\n </tr>\n <tr>\n <th>1</th>\n <td>170.49</td>\n <td>185.91</td>\n <td>198.45</td>\n <td>160.79</td>\n </tr>\n <tr>\n <th>2</th>\n <td>192.77</td>\n <td>194.92</td>\n <td>201.23</td>\n <td>185.18</td>\n </tr>\n <tr>\n <th>3</th>\n <td>177.33</td>\n <td>183.00</td>\n <td>199.61</td>\n <td>176.42</td>\n </tr>\n <tr>\n <th>4</th>\n <td>193.41</td>\n <td>169.57</td>\n <td>204.63</td>\n <td>152.60</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab1=lab[[\"Laboratory 1\"]]",
	"execution_count": 21,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab2=lab[[\"Laboratory 2\"]]",
	"execution_count": 22,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab3=lab[[\"Laboratory 3\"]]",
	"execution_count": 23,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab4=lab[[\"Laboratory 4\"]]",
	"execution_count": 24,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "lab4.head()",
	"execution_count": 25,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 25,
	"data": {
	"text/plain": " Laboratory 4\n0 166.13\n1 160.79\n2 185.18\n3 176.42\n4 152.60",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Laboratory 4</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>166.13</td>\n </tr>\n <tr>\n <th>1</th>\n <td>160.79</td>\n </tr>\n <tr>\n <th>2</th>\n <td>185.18</td>\n </tr>\n <tr>\n <th>3</th>\n <td>176.42</td>\n </tr>\n <tr>\n <th>4</th>\n <td>152.60</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab1,lab2)\n\n#p_value is greater than significance value we accept null hypothesis",
	"execution_count": 27,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 27,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([-0.29752168]), pvalue=array([0.76632779]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab1,lab3)\n\n#p_value is less than significance value we accept alternative hypothesis",
	"execution_count": 28,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 28,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([-11.16547802]), pvalue=array([1.48852644e-23]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab1,lab4)\n\n#p_value is less than significance value we accept alernative hypothesis",
	"execution_count": 29,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 29,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([8.02886396]), pvalue=array([4.52013705e-14]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab2,lab3)\n\n#p_value is less than significance value we accept alternative hypothesis",
	"execution_count": 30,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 30,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([-10.32129161]), pvalue=array([7.0291249e-21]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab2,lab4)\n#p_value is less than significance value we accept alternative hypothesis",
	"execution_count": 31,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 31,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([7.84855037]), pvalue=array([1.42616937e-13]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.ttest_ind(lab3,lab4)\n\n#p_value is less than significance value we accept alternative hypothesis",
	"execution_count": 32,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 32,
	"data": {
	"text/plain": "Ttest_indResult(statistic=array([17.72976907]), pvalue=array([2.1080551e-45]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# using Buyer ratio data \n\n\n#Sales of products in four different regions is tabulated for males and females. Find if male-female buyer rations are similar across regions.\n\n\n# H0= there is no difference between in male-female ratio\n# Ha= there is significant diff between in male-female ratio",
	"execution_count": 34,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "buy=pd.read_csv(\"BuyerRatio.csv\")",
	"execution_count": 35,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "buy.head()",
	"execution_count": 36,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 36,
	"data": {
	"text/plain": " Observed Values East West North South\n0 Males 50 142 131 70\n1 Females 435 1523 1356 750",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Observed Values</th>\n <th>East</th>\n <th>West</th>\n <th>North</th>\n <th>South</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Males</td>\n <td>50</td>\n <td>142</td>\n <td>131</td>\n <td>70</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Females</td>\n <td>435</td>\n <td>1523</td>\n <td>1356</td>\n <td>750</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df=pd.DataFrame(buy)\n",
	"execution_count": 50,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "df.columns\n",
	"execution_count": 54,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 54,
	"data": {
	"text/plain": "Index(['Observed Values', 'East', 'West', 'North', 'South'], dtype='object')"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# We can apply anova test to find out the similarity between male-female buyer rations\n\nimport scipy.stats as stats\nstats.f_oneway(df.iloc[0:,[1,4]], df.iloc[1:,[1,4]])",
	"execution_count": 55,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 55,
	"data": {
	"text/plain": "F_onewayResult(statistic=array([0.33333333, 0.33333333]), pvalue=array([0.66666667, 0.66666667]))"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "stats.norm.cdf(0.05)",
	"execution_count": 47,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 47,
	"data": {
	"text/plain": "0.5199388058383725"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# p_value= 0.666\n# P_value is greater than significant value hence we accept null hypothesis \n",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "\n#use customer order form data \n\n# TeleCall uses 4 centers around the globe to process customer order forms. They audit a certain % of the customer order forms. Any error in order form renders it defective and has to be reworked before processing. The manager wants to check whether the defective % varies by centre. Please analyze the data at 5% significance level and help the manager draw appropriate inferences\n\n\n\nimport seaborn as sns\nfrom sklearn.preprocessing import LabelEncoder\n\ncx=pd.read_csv(\"Costomer+OrderForm.csv\")",
	"execution_count": 84,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "cx.head()",
	"execution_count": 83,
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 83,
	"data": {
	"text/plain": " Phillippines Indonesia Malta India\n0 Error Free Error Free Defective Error Free\n1 Error Free Error Free Error Free Defective\n2 Error Free Defective Defective Error Free\n3 Error Free Error Free Error Free Error Free\n4 Error Free Error Free Defective Error Free",
	"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Phillippines</th>\n <th>Indonesia</th>\n <th>Malta</th>\n <th>India</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Defective</td>\n <td>Error Free</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Defective</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Error Free</td>\n <td>Defective</td>\n <td>Defective</td>\n <td>Error Free</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Error Free</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Error Free</td>\n <td>Error Free</td>\n <td>Defective</td>\n <td>Error Free</td>\n </tr>\n </tbody>\n</table>\n</div>"
	},
	"metadata": {}
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "#Before performig Ch-Square test we have to make sure data is label encoded\n\n\nlabel_encoder = LabelEncoder()\ncx['Phillippines'] = label_encoder.fit_transform(cx['Phillippines'])\ncx['Indonesia'] = label_encoder.fit_transform(cx['Indonesia'])\ncx['Malta'] = label_encoder.fit_transform(cx['Phillippines'])\ncx['India'] = label_encoder.fit_transform(cx['Indonesia'])",
	"execution_count": 85,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "cx.info()",
	"execution_count": 99,
	"outputs": [
	{
	"output_type": "stream",
	"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 300 entries, 0 to 299\nData columns (total 4 columns):\n # Column Non-Null Count Dtype\n--- ------ -------------- -----\n 0 Phillippines 300 non-null int32\n 1 Indonesia 300 non-null int32\n 2 Malta 300 non-null int64\n 3 India 300 non-null int64\ndtypes: int32(2), int64(2)\nmemory usage: 7.2 KB\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "# n=300 df=299\n# alpha= 0.05",
	"execution_count": 111,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "print(\"chi stats:\" , chi_ref[0])\nprint(\"P_value:\" , chi_ref[1])",
	"execution_count": 110,
	"outputs": [
	{
	"output_type": "stream",
	"text": "chi stats: 0.1856391005881107\nP_value: 0.6665712150680798\n",
	"name": "stdout"
	}
	]
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.8.8",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"varInspector": {
	"window_display": false,
	"cols": {
	"lenName": 16,
	"lenType": 16,
	"lenVar": 40
	},
	"kernels_config": {
	"python": {
	"library": "var_list.py",
	"delete_cmd_prefix": "del ",
	"delete_cmd_postfix": "",
	"varRefreshCmd": "print(var_dic_list())"
	},
	"r": {
	"library": "var_list.r",
	"delete_cmd_prefix": "rm(",
	"delete_cmd_postfix": ") ",
	"varRefreshCmd": "cat(var_dic_list()) "
	}
	},
	"types_to_exclude": [
	"module",
	"function",
	"builtin_function_or_method",
	"instance",
	"_Feature"
	]
	},
	"gist": {
	"id": "dfd2638ac2f36a5c6df3eb9d3778c124",
	"data": {
	"description": "hypertesting .ipynb",
	"public": true
	}
	},
	"_draft": {
	"nbviewer_url": "https://gist.github.com/dfd2638ac2f36a5c6df3eb9d3778c124"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}