rajeshpv/mod_7_console_1.txt

## mod_7_console_1.txt
Python 3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]
Type "copyright", "credits" or "license" for more information.

IPython  -- An enhanced Interactive Python.

In [1]: import numpy as np   # import numpy into Python and use it under its alias 'np'
   ...: import pandas as pd  # import pandas into Python and use it under its alias 'pd'
   ...: import statsmodels.api as sm  # import statsmodels.api into Python and use it under its alias 'sm'
   ...: import sklearn # import sklearn into Python
   ...: import matplotlib.pyplot as plt # import matplotlib.pyplot into Python and use it under its alias 'plt'
   ...: import seaborn as sns # import the seaborn library and use it under its alias 'sns'
   ...: from scipy import stats,integrate
   ...: import pylab # import pylab into Python
   ...: import scipy.stats as stats  # import scipy.stats into Python and use it under its alias 'stats'
   ...:
   ...: # Now we can read/import the data into Python. The data is included in a typical .csv data file.

In [2]: Telecom_Churn_Data = pd.read_csv("C:/Users/rao8r/Downloads/mod-7/to-sandhya/TelecomCustomerChurnData.csv", sep =",")

In [3]: Telecom_Churn_Data.head(20) # show the first 20 rows in the data
Out[3]:
    Tenure PhoneService  ... TotalCharges Churn
0        1           No  ...        29.85     0
1       34          Yes  ...      1889.50     0
2        2          Yes  ...       108.15     1
3       45           No  ...      1840.75     0
4        2          Yes  ...       151.65     1
5        8          Yes  ...       820.50     1
6       22          Yes  ...      1949.40     0
7       10           No  ...       301.90     0
8       28          Yes  ...      3046.05     1
9       62          Yes  ...      3487.95     0
10      13          Yes  ...       587.45     0
11      16          Yes  ...       326.80     0
12      58          Yes  ...      5681.10     0
13      49          Yes  ...      5036.30     1
14      25          Yes  ...      2686.05     0
15      69          Yes  ...      7895.15     0
16      52          Yes  ...      1022.95     0
17      71          Yes  ...      7382.25     0
18      10          Yes  ...       528.35     1
19      21          Yes  ...      1862.90     0

[20 rows x 7 columns]

In [4]: Telecom_Churn_Data.tail(20) # show the last 20 rows in the data
Out[4]:
      Tenure PhoneService  ... TotalCharges Churn
7011      72          Yes  ...      7544.30     0
7012      63          Yes  ...      6479.40     0
7013      44          Yes  ...      3626.35     0
7014      18          Yes  ...      1679.40     0
7015       9          Yes  ...       403.35     1
7016      13          Yes  ...       931.55     0
7017      68          Yes  ...      4326.25     0
7018       6           No  ...       263.05     0
7019       2          Yes  ...        39.25     0
7020      55          Yes  ...      3316.10     0
7021       1          Yes  ...        75.75     1
7022      38          Yes  ...      2625.25     0
7023      67          Yes  ...      6886.25     1
7024      19          Yes  ...      1495.10     0
7025      12           No  ...       743.30     0
7026      72          Yes  ...      1419.40     0
7027      24          Yes  ...      1990.50     0
7028      72          Yes  ...      7362.90     0
7029      11           No  ...       346.45     0
7030       4          Yes  ...       306.60     1

[20 rows x 7 columns]

In [5]: Telecom_Churn_Data.columns.tolist() # show the names of columns/variables in the data
Out[5]:
['Tenure',
 'PhoneService',
 'Contract',
 'PaperlessBilling',
 'PaymentMethod',
 'TotalCharges',
 'Churn']

In [6]: Telecom_Churn_Data.shape # output the dimension of the Telecom_Churn_Data object. This is similar to dim() in R
Out[6]: (7031, 7)

In [7]: Telecom_Churn_Data.dtypes # show the data types of the variables in the data
Out[7]:
Tenure                int64
PhoneService         object
Contract             object
PaperlessBilling     object
PaymentMethod        object
TotalCharges        float64
Churn                 int64
dtype: object

In [8]: Stat_summary_table = Telecom_Churn_Data.describe().T # output the statistics for all the variables.

In [9]: check_missing_value = Telecom_Churn_Data.isnull().sum(axis=1)

In [10]: check_missing_value[check_missing_value!=0].count()
Out[10]: 0

In [11]: Telecom_Churn_Data.info() # you can check whther there are "null's" in each variable.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7031 entries, 0 to 7030
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   Tenure            7031 non-null   int64
 1   PhoneService      7031 non-null   object
 2   Contract          7031 non-null   object
 3   PaperlessBilling  7031 non-null   object
 4   PaymentMethod     7031 non-null   object
 5   TotalCharges      7031 non-null   float64
 6   Churn             7031 non-null   int64
dtypes: float64(1), int64(2), object(4)
memory usage: 384.6+ KB

In [12]: List_Cate_Var = ["Tenure","PhoneService", "Contract", "PaperlessBilling", "PaymentMethod", "TotalCharges", "Churn"]

In [13]: for var_name in List_Cate_Var:
    ...:     print("The Frequency Table of the", var_name, "Variable")
    ...:     print(Telecom_Churn_Data[var_name].value_counts()) # generate the frequency table for each categorical variable in the loop
The Frequency Table of the Tenure Variable
1     613
72    362
2     238
3     200
4     176
     ...
38     59
28     57
39     56
44     51
36     50
Name: Tenure, Length: 72, dtype: int64
The Frequency Table of the PhoneService Variable
Yes    6351
No      680
Name: PhoneService, dtype: int64
The Frequency Table of the Contract Variable
Month-to-month    3875
Two year          1684
One year          1472
Name: Contract, dtype: int64
The Frequency Table of the PaperlessBilling Variable
Yes    4167
No     2864
Name: PaperlessBilling, dtype: int64
The Frequency Table of the PaymentMethod Variable
Electronic check             2365
Mailed check                 1604
Bank transfer (automatic)    1541
Credit card (automatic)      1521
Name: PaymentMethod, dtype: int64
The Frequency Table of the TotalCharges Variable
20.20      11
19.75       9
19.90       8
20.05       8
19.65       8
           ..
6849.40     1
692.35      1
130.15      1
3211.90     1
306.60      1
Name: TotalCharges, Length: 6529, dtype: int64
The Frequency Table of the Churn Variable
0    5162
1    1869
Name: Churn, dtype: int64

In [14]: plt.figure() # open a new figure window
Out[14]: <Figure size 432x288 with 0 Axes><Figure size 432x288 with 0 Axes>

In [15]: plt.hist(Telecom_Churn_Data['Tenure'], bins=25) # generate a histogram with the number of bins = 25
Out[15]:
(array([1051.,  419.,  373.,  332.,  284.,  264.,  144.,  238.,  252.,
         201.,  206.,  217.,  115.,  179.,  200.,  186.,  198.,  216.,
         138.,  209.,  203.,  218.,  244.,  293.,  651.]),
 array([ 1.  ,  3.84,  6.68,  9.52, 12.36, 15.2 , 18.04, 20.88, 23.72,
        26.56, 29.4 , 32.24, 35.08, 37.92, 40.76, 43.6 , 46.44, 49.28,
        52.12, 54.96, 57.8 , 60.64, 63.48, 66.32, 69.16, 72.  ]),
 <BarContainer object of 25 artists>)

Warning
Figures now render in the Plots pane by default. To make them also appear inline in the Console, uncheck "Mute Inline Plotting" under the Plots pane options menu.


In [16]: plt.hist(Telecom_Churn_Data['Tenure'], bins=50) # generate a histogram with the number of bins = 25
Out[16]:
(array([851., 200., 309., 110., 254., 119., 116., 216., 109., 175.,  80.,
        184.,  73.,  71., 153.,  85., 173.,  79.,  72., 129.,  72., 134.,
         64., 153.,  50.,  65., 115.,  64., 135.,  65., 112.,  74.,  68.,
        130.,  68., 148.,  70.,  68., 144.,  65., 127.,  76., 146.,  72.,
         80., 164.,  98., 195., 119., 532.]),
 array([ 1.  ,  2.42,  3.84,  5.26,  6.68,  8.1 ,  9.52, 10.94, 12.36,
        13.78, 15.2 , 16.62, 18.04, 19.46, 20.88, 22.3 , 23.72, 25.14,
        26.56, 27.98, 29.4 , 30.82, 32.24, 33.66, 35.08, 36.5 , 37.92,
        39.34, 40.76, 42.18, 43.6 , 45.02, 46.44, 47.86, 49.28, 50.7 ,
        52.12, 53.54, 54.96, 56.38, 57.8 , 59.22, 60.64, 62.06, 63.48,
        64.9 , 66.32, 67.74, 69.16, 70.58, 72.  ]),
 <BarContainer object of 50 artists>)

In [17]: plt.hist(Telecom_Churn_Data['Tenure'], bins=10) # generate a histogram with the number of bins = 25
Out[17]:
(array([1724.,  735.,  561.,  538.,  473.,  444.,  452.,  495.,  501.,
        1108.]),
 array([ 1. ,  8.1, 15.2, 22.3, 29.4, 36.5, 43.6, 50.7, 57.8, 64.9, 72. ]),
 <BarContainer object of 10 artists>)

In [18]: plt.xlabel("Tenure")  # add a x-label
Out[18]: Text(0.5, 0, 'Tenure')

In [19]: plt.ylabel("Frequency") # add a y-label
Out[19]: Text(0, 0.5, 'Frequency')

In [20]: plt.title("Histogram of Tenure") # add a title for the chart
Out[20]: Text(0.5, 1.0, 'Histogram of Tenure')

In [21]: plt.figure() # open a new figure window
    ...: plt.xlabel("Tenure")  # add a x-label
    ...: plt.ylabel("Frequency") # add a y-label
    ...: plt.title("Histogram of Tenure") # add a title for the chart
    ...: plt.hist(Telecom_Churn_Data['Tenure'], bins=25) # generate a histogram with the number of bins = 25
    ...: # here bins = 10 means that 25 classes/groups are specified
Out[21]:
(array([1051.,  419.,  373.,  332.,  284.,  264.,  144.,  238.,  252.,
         201.,  206.,  217.,  115.,  179.,  200.,  186.,  198.,  216.,
         138.,  209.,  203.,  218.,  244.,  293.,  651.]),
 array([ 1.  ,  3.84,  6.68,  9.52, 12.36, 15.2 , 18.04, 20.88, 23.72,
        26.56, 29.4 , 32.24, 35.08, 37.92, 40.76, 43.6 , 46.44, 49.28,
        52.12, 54.96, 57.8 , 60.64, 63.48, 66.32, 69.16, 72.  ]),
 <BarContainer object of 25 artists>)

In [22]: plt.figure() # open a new figure window
    ...: sns.kdeplot(Telecom_Churn_Data['Tenure']) # 'kde' here means a kernel density
Out[22]: <AxesSubplot:xlabel='Tenure', ylabel='Density'>

In [23]: plt.figure() # open a new figure window
    ...: sns.distplot(Telecom_Churn_Data['Tenure']) # as you can see that sns.distplot automatically choose
C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[23]: <AxesSubplot:xlabel='Tenure', ylabel='Density'>

In [24]: plt.figure() # open a new figure window
    ...: sns.boxplot(Telecom_Churn_Data['Tenure'])
C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
Out[24]: <AxesSubplot:xlabel='Tenure'>

In [25]:
    ...: plt.figure() # open a new figure window
    ...: plt.xlabel("TotalCharges")  # add a x-label
    ...: plt.ylabel("Frequency") # add a y-label
    ...: plt.title("Histogram of TotalCharges") # add a title for the chart
    ...: plt.hist(Telecom_Churn_Data['TotalCharges'], bins=25) # generate a histogram with the number of bins = 25
    ...: # here bins = 10 means that 25 classes/groups are specified
Out[25]:
(array([1678.,  728.,  587.,  536.,  408.,  318.,  243.,  215.,  208.,
         195.,  173.,  196.,  165.,  174.,  158.,  146.,  164.,  148.,
         135.,  111.,  101.,   87.,   77.,   55.,   25.]),
 array([  18.8 ,  365.44,  712.08, 1058.72, 1405.36, 1752.  , 2098.64,
        2445.28, 2791.92, 3138.56, 3485.2 , 3831.84, 4178.48, 4525.12,
        4871.76, 5218.4 , 5565.04, 5911.68, 6258.32, 6604.96, 6951.6 ,
        7298.24, 7644.88, 7991.52, 8338.16, 8684.8 ]),
 <BarContainer object of 25 artists>)

In [26]:
    ...: plt.figure() # open a new figure window
    ...: plt.xlabel("TotalCharges")  # add a x-label
    ...: plt.ylabel("Frequency") # add a y-label
    ...: plt.title("Histogram of TotalCharges") # add a title for the chart
    ...: plt.hist(Telecom_Churn_Data['TotalCharges'], bins=15) # generate a histogram with the number of bins = 25
    ...: # here bins = 10 means that 25 classes/groups are specified
Out[26]:
(array([2197.,  970.,  770.,  477.,  364.,  338.,  309.,  281.,  276.,
         267.,  236.,  201.,  157.,  130.,   58.]),
 array([  18.8       ,  596.53333333, 1174.26666667, 1752.        ,
        2329.73333333, 2907.46666667, 3485.2       , 4062.93333333,
        4640.66666667, 5218.4       , 5796.13333333, 6373.86666667,
        6951.6       , 7529.33333333, 8107.06666667, 8684.8       ]),
 <BarContainer object of 15 artists>)

In [27]: plt.figure() # open a new figure window
    ...: sns.kdeplot(Telecom_Churn_Data['TotalCharges']) # 'kde' here means a kernel density
Out[27]: <AxesSubplot:xlabel='TotalCharges', ylabel='Density'>

In [28]:
    ...: plt.figure() # open a new figure window
    ...: sns.distplot(Telecom_Churn_Data['TotalCharges']) # as you can see that sns.distplot automatically choose
C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[28]: <AxesSubplot:xlabel='TotalCharges', ylabel='Density'>

In [29]:
    ...:
    ...: plt.figure() # open a new figure window
    ...: sns.boxplot(Telecom_Churn_Data['TotalCharges'])
C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(
Out[29]: <AxesSubplot:xlabel='TotalCharges'>

In [30]: from scipy.stats import chi2_contingency # import the chi2_contingency module from the scipy.stats

In [31]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['Tenure']))
Out[31]:
(1059.9449494025498,
 4.084542332671357e-176,
 71,
 array([[450.05063291, 174.73417722, 146.83544304, 129.21518987,
          97.64556962,  80.75949367,  96.17721519,  90.30379747,
          87.36708861,  85.16455696,  72.6835443 ,  85.89873418,
          80.02531646,  55.79746835,  72.6835443 ,  58.73417722,
          63.87341772,  71.21518987,  53.59493671,  52.12658228,
          46.25316456,  66.07594937,  62.40506329,  69.01265823,
          58.        ,  58.        ,  52.86075949,  41.84810127,
          52.86075949,  52.86075949,  47.72151899,  50.65822785,
          46.98734177,  47.72151899,  64.60759494,  36.70886076,
          47.72151899,  43.3164557 ,  41.11392405,  46.98734177,
          51.39240506,  47.72151899,  47.72151899,  37.44303797,
          44.78481013,  54.32911392,  49.92405063,  46.98734177,
          48.4556962 ,  49.92405063,  49.92405063,  58.73417722,
          51.39240506,  49.92405063,  46.98734177,  58.73417722,
          47.72151899,  49.18987342,  44.05063291,  55.79746835,
          55.79746835,  51.39240506,  52.86075949,  58.73417722,
          55.79746835,  64.60759494,  71.94936709,  73.41772152,
          69.74683544,  87.36708861, 124.81012658, 265.7721519 ],
        [162.94936709,  63.26582278,  53.16455696,  46.78481013,
          35.35443038,  29.24050633,  34.82278481,  32.69620253,
          31.63291139,  30.83544304,  26.3164557 ,  31.10126582,
          28.97468354,  20.20253165,  26.3164557 ,  21.26582278,
          23.12658228,  25.78481013,  19.40506329,  18.87341772,
          16.74683544,  23.92405063,  22.59493671,  24.98734177,
          21.        ,  21.        ,  19.13924051,  15.15189873,
          19.13924051,  19.13924051,  17.27848101,  18.34177215,
          17.01265823,  17.27848101,  23.39240506,  13.29113924,
          17.27848101,  15.6835443 ,  14.88607595,  17.01265823,
          18.60759494,  17.27848101,  17.27848101,  13.55696203,
          16.21518987,  19.67088608,  18.07594937,  17.01265823,
          17.5443038 ,  18.07594937,  18.07594937,  21.26582278,
          18.60759494,  18.07594937,  17.01265823,  21.26582278,
          17.27848101,  17.81012658,  15.94936709,  20.20253165,
          20.20253165,  18.60759494,  19.13924051,  21.26582278,
          20.20253165,  23.39240506,  26.05063291,  26.58227848,
          25.25316456,  31.63291139,  45.18987342,  96.2278481 ]]))

In [32]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PhoneService']))
Out[32]:
(0.8780612721866795,
 0.3487332316441293,
 1,
 array([[ 499.24050633, 4662.75949367],
        [ 180.75949367, 1688.24050633]]))

In [33]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['Contract']))
Out[33]:
(1179.080956754422,
 9.243221963845809e-257,
 2,
 array([[2844.93670886, 1080.70886076, 1236.35443038],
        [1030.06329114,  391.29113924,  447.64556962]]))

In [34]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PaperlessBilling']))
Out[34]:
(257.06741761837947,
 7.477609371069561e-58,
 1,
 array([[2102.6835443, 3059.3164557],
        [ 761.3164557, 1107.6835443]]))

In [35]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PaymentMethod']))
Out[35]:
(645.1528611114002,
 1.6378691391382898e-139,
 3,
 array([[1131.36708861, 1116.6835443 , 1736.32911392, 1177.62025316],
        [ 409.63291139,  404.3164557 ,  628.67088608,  426.37974684]]))

In [36]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['TotalCharges']))
Out[36]:
(6502.6232970687415,
 0.5857175785198268,
 6528,
 array([[0.73417722, 1.46835443, 0.73417722, ..., 0.73417722, 0.73417722,
         0.73417722],
        [0.26582278, 0.53164557, 0.26582278, ..., 0.26582278, 0.26582278,
         0.26582278]]))
	Python 3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]
	Type "copyright", "credits" or "license" for more information.

	IPython -- An enhanced Interactive Python.

	In [1]: import numpy as np # import numpy into Python and use it under its alias 'np'
	...: import pandas as pd # import pandas into Python and use it under its alias 'pd'
	...: import statsmodels.api as sm # import statsmodels.api into Python and use it under its alias 'sm'
	...: import sklearn # import sklearn into Python
	...: import matplotlib.pyplot as plt # import matplotlib.pyplot into Python and use it under its alias 'plt'
	...: import seaborn as sns # import the seaborn library and use it under its alias 'sns'
	...: from scipy import stats,integrate
	...: import pylab # import pylab into Python
	...: import scipy.stats as stats # import scipy.stats into Python and use it under its alias 'stats'
	...:
	...: # Now we can read/import the data into Python. The data is included in a typical .csv data file.

	In [2]: Telecom_Churn_Data = pd.read_csv("C:/Users/rao8r/Downloads/mod-7/to-sandhya/TelecomCustomerChurnData.csv", sep =",")

	In [3]: Telecom_Churn_Data.head(20) # show the first 20 rows in the data
	Out[3]:
	Tenure PhoneService ... TotalCharges Churn
	0 1 No ... 29.85 0
	1 34 Yes ... 1889.50 0
	2 2 Yes ... 108.15 1
	3 45 No ... 1840.75 0
	4 2 Yes ... 151.65 1
	5 8 Yes ... 820.50 1
	6 22 Yes ... 1949.40 0
	7 10 No ... 301.90 0
	8 28 Yes ... 3046.05 1
	9 62 Yes ... 3487.95 0
	10 13 Yes ... 587.45 0
	11 16 Yes ... 326.80 0
	12 58 Yes ... 5681.10 0
	13 49 Yes ... 5036.30 1
	14 25 Yes ... 2686.05 0
	15 69 Yes ... 7895.15 0
	16 52 Yes ... 1022.95 0
	17 71 Yes ... 7382.25 0
	18 10 Yes ... 528.35 1
	19 21 Yes ... 1862.90 0

	[20 rows x 7 columns]

	In [4]: Telecom_Churn_Data.tail(20) # show the last 20 rows in the data
	Out[4]:
	Tenure PhoneService ... TotalCharges Churn
	7011 72 Yes ... 7544.30 0
	7012 63 Yes ... 6479.40 0
	7013 44 Yes ... 3626.35 0
	7014 18 Yes ... 1679.40 0
	7015 9 Yes ... 403.35 1
	7016 13 Yes ... 931.55 0
	7017 68 Yes ... 4326.25 0
	7018 6 No ... 263.05 0
	7019 2 Yes ... 39.25 0
	7020 55 Yes ... 3316.10 0
	7021 1 Yes ... 75.75 1
	7022 38 Yes ... 2625.25 0
	7023 67 Yes ... 6886.25 1
	7024 19 Yes ... 1495.10 0
	7025 12 No ... 743.30 0
	7026 72 Yes ... 1419.40 0
	7027 24 Yes ... 1990.50 0
	7028 72 Yes ... 7362.90 0
	7029 11 No ... 346.45 0
	7030 4 Yes ... 306.60 1

	[20 rows x 7 columns]

	In [5]: Telecom_Churn_Data.columns.tolist() # show the names of columns/variables in the data
	Out[5]:
	['Tenure',
	'PhoneService',
	'Contract',
	'PaperlessBilling',
	'PaymentMethod',
	'TotalCharges',
	'Churn']

	In [6]: Telecom_Churn_Data.shape # output the dimension of the Telecom_Churn_Data object. This is similar to dim() in R
	Out[6]: (7031, 7)

	In [7]: Telecom_Churn_Data.dtypes # show the data types of the variables in the data
	Out[7]:
	Tenure int64
	PhoneService object
	Contract object
	PaperlessBilling object
	PaymentMethod object
	TotalCharges float64
	Churn int64
	dtype: object

	In [8]: Stat_summary_table = Telecom_Churn_Data.describe().T # output the statistics for all the variables.

	In [9]: check_missing_value = Telecom_Churn_Data.isnull().sum(axis=1)

	In [10]: check_missing_value[check_missing_value!=0].count()
	Out[10]: 0

	In [11]: Telecom_Churn_Data.info() # you can check whther there are "null's" in each variable.
	<class 'pandas.core.frame.DataFrame'>
	RangeIndex: 7031 entries, 0 to 7030
	Data columns (total 7 columns):
	# Column Non-Null Count Dtype
	--- ------ -------------- -----
	0 Tenure 7031 non-null int64
	1 PhoneService 7031 non-null object
	2 Contract 7031 non-null object
	3 PaperlessBilling 7031 non-null object
	4 PaymentMethod 7031 non-null object
	5 TotalCharges 7031 non-null float64
	6 Churn 7031 non-null int64
	dtypes: float64(1), int64(2), object(4)
	memory usage: 384.6+ KB

	In [12]: List_Cate_Var = ["Tenure","PhoneService", "Contract", "PaperlessBilling", "PaymentMethod", "TotalCharges", "Churn"]

	In [13]: for var_name in List_Cate_Var:
	...: print("The Frequency Table of the", var_name, "Variable")
	...: print(Telecom_Churn_Data[var_name].value_counts()) # generate the frequency table for each categorical variable in the loop
	The Frequency Table of the Tenure Variable
	1 613
	72 362
	2 238
	3 200
	4 176
	...
	38 59
	28 57
	39 56
	44 51
	36 50
	Name: Tenure, Length: 72, dtype: int64
	The Frequency Table of the PhoneService Variable
	Yes 6351
	No 680
	Name: PhoneService, dtype: int64
	The Frequency Table of the Contract Variable
	Month-to-month 3875
	Two year 1684
	One year 1472
	Name: Contract, dtype: int64
	The Frequency Table of the PaperlessBilling Variable
	Yes 4167
	No 2864
	Name: PaperlessBilling, dtype: int64
	The Frequency Table of the PaymentMethod Variable
	Electronic check 2365
	Mailed check 1604
	Bank transfer (automatic) 1541
	Credit card (automatic) 1521
	Name: PaymentMethod, dtype: int64
	The Frequency Table of the TotalCharges Variable
	20.20 11
	19.75 9
	19.90 8
	20.05 8
	19.65 8
	..
	6849.40 1
	692.35 1
	130.15 1
	3211.90 1
	306.60 1
	Name: TotalCharges, Length: 6529, dtype: int64
	The Frequency Table of the Churn Variable
	0 5162
	1 1869
	Name: Churn, dtype: int64

	In [14]: plt.figure() # open a new figure window
	Out[14]: <Figure size 432x288 with 0 Axes><Figure size 432x288 with 0 Axes>

	In [15]: plt.hist(Telecom_Churn_Data['Tenure'], bins=25) # generate a histogram with the number of bins = 25
	Out[15]:
	(array([1051., 419., 373., 332., 284., 264., 144., 238., 252.,
	201., 206., 217., 115., 179., 200., 186., 198., 216.,
	138., 209., 203., 218., 244., 293., 651.]),
	array([ 1. , 3.84, 6.68, 9.52, 12.36, 15.2 , 18.04, 20.88, 23.72,
	26.56, 29.4 , 32.24, 35.08, 37.92, 40.76, 43.6 , 46.44, 49.28,
	52.12, 54.96, 57.8 , 60.64, 63.48, 66.32, 69.16, 72. ]),
	<BarContainer object of 25 artists>)

	Warning
	Figures now render in the Plots pane by default. To make them also appear inline in the Console, uncheck "Mute Inline Plotting" under the Plots pane options menu.


	In [16]: plt.hist(Telecom_Churn_Data['Tenure'], bins=50) # generate a histogram with the number of bins = 25
	Out[16]:
	(array([851., 200., 309., 110., 254., 119., 116., 216., 109., 175., 80.,
	184., 73., 71., 153., 85., 173., 79., 72., 129., 72., 134.,
	64., 153., 50., 65., 115., 64., 135., 65., 112., 74., 68.,
	130., 68., 148., 70., 68., 144., 65., 127., 76., 146., 72.,
	80., 164., 98., 195., 119., 532.]),
	array([ 1. , 2.42, 3.84, 5.26, 6.68, 8.1 , 9.52, 10.94, 12.36,
	13.78, 15.2 , 16.62, 18.04, 19.46, 20.88, 22.3 , 23.72, 25.14,
	26.56, 27.98, 29.4 , 30.82, 32.24, 33.66, 35.08, 36.5 , 37.92,
	39.34, 40.76, 42.18, 43.6 , 45.02, 46.44, 47.86, 49.28, 50.7 ,
	52.12, 53.54, 54.96, 56.38, 57.8 , 59.22, 60.64, 62.06, 63.48,
	64.9 , 66.32, 67.74, 69.16, 70.58, 72. ]),
	<BarContainer object of 50 artists>)

	In [17]: plt.hist(Telecom_Churn_Data['Tenure'], bins=10) # generate a histogram with the number of bins = 25
	Out[17]:
	(array([1724., 735., 561., 538., 473., 444., 452., 495., 501.,
	1108.]),
	array([ 1. , 8.1, 15.2, 22.3, 29.4, 36.5, 43.6, 50.7, 57.8, 64.9, 72. ]),
	<BarContainer object of 10 artists>)

	In [18]: plt.xlabel("Tenure") # add a x-label
	Out[18]: Text(0.5, 0, 'Tenure')

	In [19]: plt.ylabel("Frequency") # add a y-label
	Out[19]: Text(0, 0.5, 'Frequency')

	In [20]: plt.title("Histogram of Tenure") # add a title for the chart
	Out[20]: Text(0.5, 1.0, 'Histogram of Tenure')

	In [21]: plt.figure() # open a new figure window
	...: plt.xlabel("Tenure") # add a x-label
	...: plt.ylabel("Frequency") # add a y-label
	...: plt.title("Histogram of Tenure") # add a title for the chart
	...: plt.hist(Telecom_Churn_Data['Tenure'], bins=25) # generate a histogram with the number of bins = 25
	...: # here bins = 10 means that 25 classes/groups are specified
	Out[21]:
	(array([1051., 419., 373., 332., 284., 264., 144., 238., 252.,
	201., 206., 217., 115., 179., 200., 186., 198., 216.,
	138., 209., 203., 218., 244., 293., 651.]),
	array([ 1. , 3.84, 6.68, 9.52, 12.36, 15.2 , 18.04, 20.88, 23.72,
	26.56, 29.4 , 32.24, 35.08, 37.92, 40.76, 43.6 , 46.44, 49.28,
	52.12, 54.96, 57.8 , 60.64, 63.48, 66.32, 69.16, 72. ]),
	<BarContainer object of 25 artists>)

	In [22]: plt.figure() # open a new figure window
	...: sns.kdeplot(Telecom_Churn_Data['Tenure']) # 'kde' here means a kernel density
	Out[22]: <AxesSubplot:xlabel='Tenure', ylabel='Density'>

	In [23]: plt.figure() # open a new figure window
	...: sns.distplot(Telecom_Churn_Data['Tenure']) # as you can see that sns.distplot automatically choose
	C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
	warnings.warn(msg, FutureWarning)
	Out[23]: <AxesSubplot:xlabel='Tenure', ylabel='Density'>

	In [24]: plt.figure() # open a new figure window
	...: sns.boxplot(Telecom_Churn_Data['Tenure'])
	C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
	warnings.warn(
	Out[24]: <AxesSubplot:xlabel='Tenure'>

	In [25]:
	...: plt.figure() # open a new figure window
	...: plt.xlabel("TotalCharges") # add a x-label
	...: plt.ylabel("Frequency") # add a y-label
	...: plt.title("Histogram of TotalCharges") # add a title for the chart
	...: plt.hist(Telecom_Churn_Data['TotalCharges'], bins=25) # generate a histogram with the number of bins = 25
	...: # here bins = 10 means that 25 classes/groups are specified
	Out[25]:
	(array([1678., 728., 587., 536., 408., 318., 243., 215., 208.,
	195., 173., 196., 165., 174., 158., 146., 164., 148.,
	135., 111., 101., 87., 77., 55., 25.]),
	array([ 18.8 , 365.44, 712.08, 1058.72, 1405.36, 1752. , 2098.64,
	2445.28, 2791.92, 3138.56, 3485.2 , 3831.84, 4178.48, 4525.12,
	4871.76, 5218.4 , 5565.04, 5911.68, 6258.32, 6604.96, 6951.6 ,
	7298.24, 7644.88, 7991.52, 8338.16, 8684.8 ]),
	<BarContainer object of 25 artists>)

	In [26]:
	...: plt.figure() # open a new figure window
	...: plt.xlabel("TotalCharges") # add a x-label
	...: plt.ylabel("Frequency") # add a y-label
	...: plt.title("Histogram of TotalCharges") # add a title for the chart
	...: plt.hist(Telecom_Churn_Data['TotalCharges'], bins=15) # generate a histogram with the number of bins = 25
	...: # here bins = 10 means that 25 classes/groups are specified
	Out[26]:
	(array([2197., 970., 770., 477., 364., 338., 309., 281., 276.,
	267., 236., 201., 157., 130., 58.]),
	array([ 18.8 , 596.53333333, 1174.26666667, 1752. ,
	2329.73333333, 2907.46666667, 3485.2 , 4062.93333333,
	4640.66666667, 5218.4 , 5796.13333333, 6373.86666667,
	6951.6 , 7529.33333333, 8107.06666667, 8684.8 ]),
	<BarContainer object of 15 artists>)

	In [27]: plt.figure() # open a new figure window
	...: sns.kdeplot(Telecom_Churn_Data['TotalCharges']) # 'kde' here means a kernel density
	Out[27]: <AxesSubplot:xlabel='TotalCharges', ylabel='Density'>

	In [28]:
	...: plt.figure() # open a new figure window
	...: sns.distplot(Telecom_Churn_Data['TotalCharges']) # as you can see that sns.distplot automatically choose
	C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
	warnings.warn(msg, FutureWarning)
	Out[28]: <AxesSubplot:xlabel='TotalCharges', ylabel='Density'>

	In [29]:
	...:
	...: plt.figure() # open a new figure window
	...: sns.boxplot(Telecom_Churn_Data['TotalCharges'])
	C:\Users\rao8r\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
	warnings.warn(
	Out[29]: <AxesSubplot:xlabel='TotalCharges'>

	In [30]: from scipy.stats import chi2_contingency # import the chi2_contingency module from the scipy.stats

	In [31]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['Tenure']))
	Out[31]:
	(1059.9449494025498,
	4.084542332671357e-176,
	71,
	array([[450.05063291, 174.73417722, 146.83544304, 129.21518987,
	97.64556962, 80.75949367, 96.17721519, 90.30379747,
	87.36708861, 85.16455696, 72.6835443 , 85.89873418,
	80.02531646, 55.79746835, 72.6835443 , 58.73417722,
	63.87341772, 71.21518987, 53.59493671, 52.12658228,
	46.25316456, 66.07594937, 62.40506329, 69.01265823,
	58. , 58. , 52.86075949, 41.84810127,
	52.86075949, 52.86075949, 47.72151899, 50.65822785,
	46.98734177, 47.72151899, 64.60759494, 36.70886076,
	47.72151899, 43.3164557 , 41.11392405, 46.98734177,
	51.39240506, 47.72151899, 47.72151899, 37.44303797,
	44.78481013, 54.32911392, 49.92405063, 46.98734177,
	48.4556962 , 49.92405063, 49.92405063, 58.73417722,
	51.39240506, 49.92405063, 46.98734177, 58.73417722,
	47.72151899, 49.18987342, 44.05063291, 55.79746835,
	55.79746835, 51.39240506, 52.86075949, 58.73417722,
	55.79746835, 64.60759494, 71.94936709, 73.41772152,
	69.74683544, 87.36708861, 124.81012658, 265.7721519 ],
	[162.94936709, 63.26582278, 53.16455696, 46.78481013,
	35.35443038, 29.24050633, 34.82278481, 32.69620253,
	31.63291139, 30.83544304, 26.3164557 , 31.10126582,
	28.97468354, 20.20253165, 26.3164557 , 21.26582278,
	23.12658228, 25.78481013, 19.40506329, 18.87341772,
	16.74683544, 23.92405063, 22.59493671, 24.98734177,
	21. , 21. , 19.13924051, 15.15189873,
	19.13924051, 19.13924051, 17.27848101, 18.34177215,
	17.01265823, 17.27848101, 23.39240506, 13.29113924,
	17.27848101, 15.6835443 , 14.88607595, 17.01265823,
	18.60759494, 17.27848101, 17.27848101, 13.55696203,
	16.21518987, 19.67088608, 18.07594937, 17.01265823,
	17.5443038 , 18.07594937, 18.07594937, 21.26582278,
	18.60759494, 18.07594937, 17.01265823, 21.26582278,
	17.27848101, 17.81012658, 15.94936709, 20.20253165,
	20.20253165, 18.60759494, 19.13924051, 21.26582278,
	20.20253165, 23.39240506, 26.05063291, 26.58227848,
	25.25316456, 31.63291139, 45.18987342, 96.2278481 ]]))

	In [32]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PhoneService']))
	Out[32]:
	(0.8780612721866795,
	0.3487332316441293,
	1,
	array([[ 499.24050633, 4662.75949367],
	[ 180.75949367, 1688.24050633]]))

	In [33]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['Contract']))
	Out[33]:
	(1179.080956754422,
	9.243221963845809e-257,
	2,
	array([[2844.93670886, 1080.70886076, 1236.35443038],
	[1030.06329114, 391.29113924, 447.64556962]]))

	In [34]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PaperlessBilling']))
	Out[34]:
	(257.06741761837947,
	7.477609371069561e-58,
	1,
	array([[2102.6835443, 3059.3164557],
	[ 761.3164557, 1107.6835443]]))

	In [35]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['PaymentMethod']))
	Out[35]:
	(645.1528611114002,
	1.6378691391382898e-139,
	3,
	array([[1131.36708861, 1116.6835443 , 1736.32911392, 1177.62025316],
	[ 409.63291139, 404.3164557 , 628.67088608, 426.37974684]]))

	In [36]: chi2_contingency(pd.crosstab(Telecom_Churn_Data['Churn'], Telecom_Churn_Data['TotalCharges']))
	Out[36]:
	(6502.6232970687415,
	0.5857175785198268,
	6528,
	array([[0.73417722, 1.46835443, 0.73417722, ..., 0.73417722, 0.73417722,
	0.73417722],
	[0.26582278, 0.53164557, 0.26582278, ..., 0.26582278, 0.26582278,
	0.26582278]]))