train_df.where("cabin is null").count()/train_df.count() *100
#77% of values is missing for cabin column. So ignoring this column for the model
wo_cabin_train_df = train_df.drop("cabin")
