liliya2022/gist:a5c7c905c0ff63b182f3eca753c5e3c0

## gistfile1.txt
#Import the following libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

#separate numerical and catergorical features
housing_numerical = housing.drop(['PID'], axis =1).select_dtypes(include = ('int64', 'float64'))
housing_categorical = housing.select_dtypes(include='object')

#clean data by filling missing values with zeros
housing_numerical.isnull().sum(axis=0)
housing_numerical = housing_numerical.fillna(0.0)

#split the target from other features for numerical columns
numerical_features = housing_numerical.drop(['SalePrice'], axis =1)
price = housing['SalePrice']

#split the data to training and testing
X_train, X_test, y_train, y_test = train_test_split(numerical_features, price, test_size=0.25, random_state=12)

#fit the training data
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)

#get the feature importance attribute
rf.feature_importances_

#visualize the feature importance
sorted_idx = rf.feature_importances_.argsort()
plt.barh(numerical_features.columns[sorted_idx], rf.feature_importances_[sorted_idx])
plt.xlabel("Random Forest Feature Importance")

#use label encoder for categorical features
housing_categorical_encoded = housing_categorical.apply(LabelEncoder().fit_transform)

#repeat the same steps as for numerical features
	#Import the following libraries
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.preprocessing import LabelEncoder
	import matplotlib.pyplot as plt

	#separate numerical and catergorical features
	housing_numerical = housing.drop(['PID'], axis =1).select_dtypes(include = ('int64', 'float64'))
	housing_categorical = housing.select_dtypes(include='object')

	#clean data by filling missing values with zeros
	housing_numerical.isnull().sum(axis=0)
	housing_numerical = housing_numerical.fillna(0.0)

	#split the target from other features for numerical columns
	numerical_features = housing_numerical.drop(['SalePrice'], axis =1)
	price = housing['SalePrice']

	#split the data to training and testing
	X_train, X_test, y_train, y_test = train_test_split(numerical_features, price, test_size=0.25, random_state=12)

	#fit the training data
	rf = RandomForestRegressor(n_estimators=100)
	rf.fit(X_train, y_train)

	#get the feature importance attribute
	rf.feature_importances_

	#visualize the feature importance
	sorted_idx = rf.feature_importances_.argsort()
	plt.barh(numerical_features.columns[sorted_idx], rf.feature_importances_[sorted_idx])
	plt.xlabel("Random Forest Feature Importance")

	#use label encoder for categorical features
	housing_categorical_encoded = housing_categorical.apply(LabelEncoder().fit_transform)

	#repeat the same steps as for numerical features