Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
rfecv_2_cleaning
data.drop(['Ticket', 'PassengerId'], axis=1, inplace=True)
gender_mapper = {'male': 0, 'female': 1}
data['Sex'].replace(gender_mapper, inplace=True)
data['Title'] = data['Name'].apply(lambda x: x.split(',')[1].strip().split(' ')[0])
data['Title'] = [0 if x in ['Mr.', 'Miss.', 'Mrs.'] else 1 for x in data['Title']]
data = data.rename(columns={'Title': 'Title_Unusual'})
data.drop('Name', axis=1, inplace=True)
data['Cabin_Known'] = [0 if str(x) == 'nan' else 1 for x in data['Cabin']]
data.drop('Cabin', axis=1, inplace=True)
emb_dummies = pd.get_dummies(data['Embarked'], drop_first=True, prefix='Embarked')
data = pd.concat([data, emb_dummies], axis=1)
data.drop('Embarked', axis=1, inplace=True)
data['Age'] = data['Age'].fillna(int(data['Age'].mean()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment