Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created December 18, 2020 14:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/5ec650dac6580d7812a5f1afc54126d2 to your computer and use it in GitHub Desktop.
Save amankharwal/5ec650dac6580d7812a5f1afc54126d2 to your computer and use it in GitHub Desktop.
def simplify_ages(df):
df.Age = df.Age.fillna(-0.5)
bins = (-1, 0, 5, 12, 18, 25, 35, 60, 120)
group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']
categories = pd.cut(df.Age, bins, labels=group_names)
df.Age = categories
return df
def simplify_cabins(df):
df.Cabin = df.Cabin.fillna('N')
df.Cabin = df.Cabin.apply(lambda x: x[0])
return df
def simplify_fares(df):
df.Fare = df.Fare.fillna(-0.5)
bins = (-1, 0, 8, 15, 31, 1000)
group_names = ['Unknown', '1_quartile', '2_quartile', '3_quartile', '4_quartile']
categories = pd.cut(df.Fare, bins, labels=group_names)
df.Fare = categories
return df
def format_name(df):
df['Lname'] = df.Name.apply(lambda x: x.split(' ')[0])
df['NamePrefix'] = df.Name.apply(lambda x: x.split(' ')[1])
return df
def drop_features(df):
return df.drop(['Ticket', 'Name', 'Embarked'], axis=1)
def transform_features(df):
df = simplify_ages(df)
df = simplify_cabins(df)
df = simplify_fares(df)
df = format_name(df)
df = drop_features(df)
return df
data_train = transform_features(data_train)
data_test = transform_features(data_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment