Skip to content

Instantly share code, notes, and snippets.

@liliya2022
Last active March 10, 2023 20:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liliya2022/a5c7c905c0ff63b182f3eca753c5e3c0 to your computer and use it in GitHub Desktop.
Save liliya2022/a5c7c905c0ff63b182f3eca753c5e3c0 to your computer and use it in GitHub Desktop.
#Import the following libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
#separate numerical and catergorical features
housing_numerical = housing.drop(['PID'], axis =1).select_dtypes(include = ('int64', 'float64'))
housing_categorical = housing.select_dtypes(include='object')
#clean data by filling missing values with zeros
housing_numerical.isnull().sum(axis=0)
housing_numerical = housing_numerical.fillna(0.0)
#split the target from other features for numerical columns
numerical_features = housing_numerical.drop(['SalePrice'], axis =1)
price = housing['SalePrice']
#split the data to training and testing
X_train, X_test, y_train, y_test = train_test_split(numerical_features, price, test_size=0.25, random_state=12)
#fit the training data
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
#get the feature importance attribute
rf.feature_importances_
#visualize the feature importance
sorted_idx = rf.feature_importances_.argsort()
plt.barh(numerical_features.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")
#use label encoder for categorical features
housing_categorical_encoded = housing_categorical.apply(LabelEncoder().fit_transform)
#repeat the same steps as for numerical features
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment