Last active
January 1, 2021 03:52
-
-
Save vatsaldin/beee51292e711d08190cdeca49baef28 to your computer and use it in GitHub Desktop.
Load and pre-process data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load dataset from csv using pandas | |
dataset = pd.read_csv('data/hypothyroid.csv') | |
dataset.head() | |
# Renaming the first column as target | |
dataset = dataset.rename(columns = {dataset.columns[0]:"target"}) | |
dataset["target"] = dataset["target"].map({"negative":0,"hypothyroid":1}) | |
# Replacing the categorical values into binary values | |
dataset = dataset.replace({'f':0,'t':1, 'y':1, 'n':0, 'M':0, 'F':1}) | |
# Replacing ? into NaN values | |
dataset.replace(to_replace='?', inplace=True, value=np.NaN) | |
# Count the number of null values | |
dataset.isnull().sum() | |
# Dropping the TBG column as it contains extremely high number of null values | |
dataset.drop('TBG', axis = 1, inplace=True) | |
# Selecting columns with data type as 'object' | |
columns = dataset.columns[dataset.dtypes.eq('object')] | |
# Convert to numeric values | |
dataset[columns] = dataset[columns].apply(pd.to_numeric, errors='coerce') | |
# Replacing null values by mean | |
dataset['Age'].fillna(dataset['Age'].mean(), inplace = True) | |
dataset['T4U'].fillna(dataset['T4U'].mean(), inplace = True) | |
# Replacing null values by median | |
dataset['TSH'].fillna(dataset['TSH'].mean(), inplace = True) | |
dataset['T3'].fillna(dataset['T3'].median(), inplace = True) | |
dataset['TT4'].fillna(dataset['TT4'].median(), inplace = True) | |
dataset['FTI'].fillna(dataset['FTI'].median(), inplace = True) | |
# The gender data looks to be imbalanced with 0 lesser than 1 | |
# Replacing null values with 0 | |
dataset['Gender'].fillna(0, inplace = True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment