{ "cells": [ { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style>\n", " .dataframe thead tr:only-child th {\n", " text-align: right;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>AGE</th>\n", " <th>BILIRUBIN</th>\n", " <th>PROTIME</th>\n", " <th>ALBUMIN</th>\n", " <th>ALK_PHOSPHATE</th>\n", " <th>SGOT</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>155.000000</td>\n", " <td>149.000000</td>\n", " <td>88.000000</td>\n", " <td>139.000000</td>\n", " <td>126.000000</td>\n", " <td>151.00000</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>41.200000</td>\n", " <td>1.427517</td>\n", " <td>61.852273</td>\n", " <td>3.817266</td>\n", " <td>105.325397</td>\n", " <td>85.89404</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>12.565878</td>\n", " <td>1.212149</td>\n", " <td>22.875244</td>\n", " <td>0.651523</td>\n", " <td>51.508109</td>\n", " <td>89.65089</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>7.000000</td>\n", " <td>0.300000</td>\n", " <td>0.000000</td>\n", " <td>2.100000</td>\n", " <td>26.000000</td>\n", " <td>14.00000</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>32.000000</td>\n", " <td>0.700000</td>\n", " <td>46.000000</td>\n", " <td>3.400000</td>\n", " <td>74.250000</td>\n", " <td>31.50000</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>39.000000</td>\n", " <td>1.000000</td>\n", " <td>61.000000</td>\n", " <td>4.000000</td>\n", " <td>85.000000</td>\n", " <td>58.00000</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>50.000000</td>\n", " <td>1.500000</td>\n", " <td>76.250000</td>\n", " <td>4.200000</td>\n", " <td>132.250000</td>\n", " <td>100.50000</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>78.000000</td>\n", " <td>8.000000</td>\n", " <td>100.000000</td>\n", " <td>6.400000</td>\n", " <td>295.000000</td>\n", " <td>648.00000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " AGE BILIRUBIN PROTIME ALBUMIN ALK_PHOSPHATE \\\n", "count 155.000000 149.000000 88.000000 139.000000 126.000000 \n", "mean 41.200000 1.427517 61.852273 3.817266 105.325397 \n", "std 12.565878 1.212149 22.875244 0.651523 51.508109 \n", "min 7.000000 0.300000 0.000000 2.100000 26.000000 \n", "25% 32.000000 0.700000 46.000000 3.400000 74.250000 \n", "50% 39.000000 1.000000 61.000000 4.000000 85.000000 \n", "75% 50.000000 1.500000 76.250000 4.200000 132.250000 \n", "max 78.000000 8.000000 100.000000 6.400000 295.000000 \n", "\n", " SGOT \n", "count 151.00000 \n", "mean 85.89404 \n", "std 89.65089 \n", "min 14.00000 \n", "25% 31.50000 \n", "50% 58.00000 \n", "75% 100.50000 \n", "max 648.00000 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numerical_variables = ['AGE', 'BILIRUBIN', 'PROTIME', 'ALBUMIN', 'ALK_PHOSPHATE', 'SGOT']\n", "hepatitis_data[numerical_variables].describe()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style>\n", " .dataframe thead tr:only-child th {\n", " text-align: right;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>SEX</th>\n", " <th>STEROID</th>\n", " <th>ANTIVIRALS</th>\n", " <th>FATIGUE</th>\n", " <th>MALAISE</th>\n", " <th>ANOREXIA</th>\n", " <th>LIVER_BIG</th>\n", " <th>LIVER_FIRM</th>\n", " <th>SPLEEN_PALPABLE</th>\n", " <th>SPIDERS</th>\n", " <th>ASCITES</th>\n", " <th>VARICES</th>\n", " <th>HISTOLOGY</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0.0</th>\n", " <td>139</td>\n", " <td>76</td>\n", " <td>131</td>\n", " <td>54</td>\n", " <td>93</td>\n", " <td>122</td>\n", " <td>25</td>\n", " <td>84</td>\n", " <td>120</td>\n", " <td>99</td>\n", " <td>130</td>\n", " <td>132</td>\n", " <td>85</td>\n", " </tr>\n", " <tr>\n", " <th>1.0</th>\n", " <td>16</td>\n", " <td>78</td>\n", " <td>24</td>\n", " <td>100</td>\n", " <td>61</td>\n", " <td>32</td>\n", " <td>120</td>\n", " <td>60</td>\n", " <td>30</td>\n", " <td>51</td>\n", " <td>20</td>\n", " <td>18</td>\n", " <td>70</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " SEX STEROID ANTIVIRALS FATIGUE MALAISE ANOREXIA LIVER_BIG \\\n", "0.0 139 76 131 54 93 122 25 \n", "1.0 16 78 24 100 61 32 120 \n", "\n", " LIVER_FIRM SPLEEN_PALPABLE SPIDERS ASCITES VARICES HISTOLOGY \n", "0.0 84 120 99 130 132 85 \n", "1.0 60 30 51 20 18 70 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categorical_variables = ['SEX' , 'STEROID', 'ANTIVIRALS', 'FATIGUE','MALAISE','ANOREXIA','LIVER_BIG','LIVER_FIRM','SPLEEN_PALPABLE',\n", "'SPIDERS','ASCITES','VARICES', 'HISTOLOGY']\n", "hepatitis_data[categorical_variables].apply(pd.Series.value_counts)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }