Emekaborisama/data_preprocess.py

## data_preprocess.py
def convert_int(x):
    """convert float to int"""
    try:
        return int(x)
    except:
        pass
float_cols = train_df.select_dtypes("float64")
#convert mixed data type to int
new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
# convert datetime str to pandas datetime so we can extract days, months and year.
new_train_df['last_review'] = pd.to_datetime(new_train_df['last_review'])
new_train_df['last_review_month'] =new_train_df['last_review'].dt.month
new_train_df['last_review_days'] = new_train_df['last_review'].dt.day
new_train_df['last_review_year'] = 2022 - new_train_df['last_review'].dt.year
del new_train_df['last_review']
#label encoding on nominal or categorical values
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
cat_columns = ['room_type']
new_train_df["room_type"] = le.fit_transform(new_train_df['room_type'])
	def convert_int(x):
	"""convert float to int"""
	try:
	return int(x)
	except:
	pass
	float_cols = train_df.select_dtypes("float64")
	#convert mixed data type to int
	new_train_df["reviews_per_month"] = new_train_df["reviews_per_month"].apply(convert_int)
	# convert datetime str to pandas datetime so we can extract days, months and year.
	new_train_df['last_review'] = pd.to_datetime(new_train_df['last_review'])
	new_train_df['last_review_month'] =new_train_df['last_review'].dt.month
	new_train_df['last_review_days'] = new_train_df['last_review'].dt.day
	new_train_df['last_review_year'] = 2022 - new_train_df['last_review'].dt.year
	del new_train_df['last_review']
	#label encoding on nominal or categorical values
	from sklearn import preprocessing
	le = preprocessing.LabelEncoder()
	cat_columns = ['room_type']
	new_train_df["room_type"] = le.fit_transform(new_train_df['room_type'])