This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# When analyzing survey data, it is common for missing values to be represented by various placeholders | |
# such as "DK", "NO RESPONSE", "Missing/DK", and "REFUSED". During exploratory data analysis (EDA), | |
# it is standard practice to examine instances of missing values using the command df.isna().sum(). | |
# However, this command does not account for the aforementioned placeholders. | |
# Consequently, it is beneficial to employ a utility function that computes the count of these specific | |
# representations of missing values. | |
def count_special_values(df: pd.DataFrame)->pd.DataFrame: | |
""" | |
Counts the occurrences of specific special values (DK, REFUSED, NO RESPONSE, Missing/DK) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_int_to_word(num: int) -> str: | |
"""Convert integer from zero to trillion to its words representation""" | |
d = { 0 : 'zero', 1 : 'one', 2 : 'two', 3 : 'three', 4 : 'four', 5 : 'five', | |
6 : 'six', 7 : 'seven', 8 : 'eight', 9 : 'nine', 10 : 'ten', | |
11 : 'eleven', 12 : 'twelve', 13 : 'thirteen', 14 : 'fourteen', | |
15 : 'fifteen', 16 : 'sixteen', 17 : 'seventeen', 18 : 'eighteen', | |
19 : 'nineteen', 20 : 'twenty', | |
30 : 'thirty', 40 : 'forty', 50 : 'fifty', 60 : 'sixty', | |
70 : 'seventy', 80 : 'eighty', 90 : 'ninety' } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
rng = np.random.default_rng(1) | |
# Create a values from normal distribution with mean 0 and variance 1 | |
data = rng.standard_normal((127, 5)) | |
missing = rng.choice([0, np.nan], p=[0.7, 0.3], size=data.shape) # 30% missing data | |
data += missing |