Skip to content

Instantly share code, notes, and snippets.

@aflansburg
Last active July 14, 2021 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflansburg/79740baa9b4a653797ec6df7f74e8ff9 to your computer and use it in GitHub Desktop.
Save aflansburg/79740baa9b4a653797ec6df7f74e8ff9 to your computer and use it in GitHub Desktop.
Typing & Docstrings: Python
# function to iterate over specified variables and view their value counts
# add typing to help understand our function if reused elsewhere
from typing import List
def value_count_rep(columns: List, df: pd.DataFrame) -> None:
'''
Parameters: List of columns to iterate over
Returns: No return value. Prints the value counts of each column(feature) to stdout
'''
for column in columns:
output = f'{column} - {df[column].dtype}\n'\
'--------------------------------------\n'\
'Value|Count\n'\
'--------------------------------------\n'\
f'{df[column].value_counts()}]\n'\
'======================================\n'
print(output)
# Now we can see the description of the function with Shift+Tab in Jupyter Notebook
numerical_features = ['Age', 'Experience', 'Income', 'ZIPCode', 'Family', 'CCAvg', 'Mortgage']
value_count_rep(numerical_features,data)
Age - int64
--------------------------------------
Value|Count
--------------------------------------
35 151
43 149
52 145
58 143
54 143
50 138
41 136
30 136
56 135
34 134
39 133
57 132
59 132
51 129
60 127
45 127
46 127
42 126
31 125
40 125
55 125
29 123
62 123
61 122
44 121
33 120
32 120
48 118
49 115
38 115
47 113
53 112
63 108
36 107
37 106
28 103
27 91
65 80
64 78
26 78
25 53
24 28
66 24
23 12
67 12
Name: Age, dtype: int64]
======================================
Experience - int64
--------------------------------------
Value|Count
--------------------------------------
32 154
20 148
9 147
5 146
23 144
35 143
25 142
28 138
18 137
19 135
26 134
24 131
3 129
14 127
16 127
30 126
34 125
27 125
17 125
22 124
29 124
7 121
15 119
8 119
6 119
10 118
33 117
13 117
11 116
37 116
36 114
4 113
21 113
31 104
12 102
38 88
2 85
39 85
1 74
0 66
40 57
41 43
-1 33
-2 15
42 8
-3 4
43 3
Name: Experience, dtype: int64]
======================================
Income - int64
--------------------------------------
Value|Count
--------------------------------------
44 85
38 84
81 83
41 82
39 81
..
202 2
189 2
203 2
218 1
224 1
Name: Income, Length: 162, dtype: int64]
======================================
ZIPCode - int64
--------------------------------------
Value|Count
--------------------------------------
94720 169
94305 127
95616 116
90095 71
93106 57
...
94087 1
96145 1
90068 1
92694 1
94404 1
Name: ZIPCode, Length: 467, dtype: int64]
======================================
Family - int64
--------------------------------------
Value|Count
--------------------------------------
1 1472
2 1296
4 1222
3 1010
Name: Family, dtype: int64]
======================================
CCAvg - float64
--------------------------------------
Value|Count
--------------------------------------
0.30 241
1.00 231
0.20 204
2.00 188
0.80 187
...
5.33 1
8.20 1
3.67 1
9.30 1
8.90 1
Name: CCAvg, Length: 108, dtype: int64]
======================================
Mortgage - int64
--------------------------------------
Value|Count
--------------------------------------
0 3462
98 17
89 16
91 16
83 16
...
206 1
210 1
258 1
278 1
635 1
Name: Mortgage, Length: 347, dtype: int64]
======================================
# function to iterate over specified variables and view their value counts
# add typing to help understand our function if reused elsewhere
from typing import List
import numpy as np
# our alias - Vector is a List of integers
Vector = List[np.int64]
def value_count_rep(columns: Vector, df: pd.DataFrame) -> None:
'''
Parameters: List of columns to iterate over
Returns: No return value. Prints the value counts of each column(feature) to stdout
'''
for column in columns:
output = f'{column} - {df[column].dtype}\n'\
'--------------------------------------\n'\
'Value|Count\n'\
'--------------------------------------\n'\
f'{df[column].value_counts()}]\n'\
'======================================\n'
print(output)
# Now we can see the description of the function with Shift+Tab in Jupyter Notebook
numerical_features = ['Age', 'Experience', 'Income', 'ZIPCode', 'Family', 'CCAvg', 'Mortgage']
value_count_rep(numerical_features,data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment