amansk2050/random_forest_regression

## random_forest_regression
#importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import matplotlib.pyplot as plt
#load_dataset
dataset= pd.read_csv('/kaggle/input/usa-housing/USA_Housing.csv')
#preaparing data for training
y=dataset.Price
features=['Avg. Area Income','Avg. Area House Age','Avg. Area Number of Rooms','Avg. Area Number of Bedrooms','Area Population']
X=dataset[features]
# dividing data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
#  feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# training algorithm
regressor = RandomForestRegressor(n_estimators=500, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
#Evaluating the algorithm
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
#visualizing the predicted value
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Actual')
ax.set_ylabel('Predicted')
ax.set_title("Ground Truth vs Predicted")
plt.show()
	#importing libraries
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestRegressor
	from sklearn import metrics
	import matplotlib.pyplot as plt
	#load_dataset
	dataset= pd.read_csv('/kaggle/input/usa-housing/USA_Housing.csv')
	#preaparing data for training
	y=dataset.Price
	features=['Avg. Area Income','Avg. Area House Age','Avg. Area Number of Rooms','Avg. Area Number of Bedrooms','Area Population']
	X=dataset[features]
	# dividing data into train and test
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
	# feature Scaling
	sc = StandardScaler()
	X_train = sc.fit_transform(X_train)
	X_test = sc.transform(X_test)
	# training algorithm
	regressor = RandomForestRegressor(n_estimators=500, random_state=0)
	regressor.fit(X_train, y_train)
	y_pred = regressor.predict(X_test)
	#Evaluating the algorithm
	print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
	print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
	print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
	#visualizing the predicted value
	fig, ax = plt.subplots()
	ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
	ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
	ax.set_xlabel('Actual')
	ax.set_ylabel('Predicted')
	ax.set_title("Ground Truth vs Predicted")
	plt.show()