Skip to content

Instantly share code, notes, and snippets.

@amansk2050
Created July 6, 2020 16:17
Show Gist options
  • Save amansk2050/d279ca459b7116bb82374dc7e83f3e2b to your computer and use it in GitHub Desktop.
Save amansk2050/d279ca459b7116bb82374dc7e83f3e2b to your computer and use it in GitHub Desktop.
Code of regression in Random Forest Algorithm in sklearn python
#importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import matplotlib.pyplot as plt
#load_dataset
dataset= pd.read_csv('/kaggle/input/usa-housing/USA_Housing.csv')
#preaparing data for training
y=dataset.Price
features=['Avg. Area Income','Avg. Area House Age','Avg. Area Number of Rooms','Avg. Area Number of Bedrooms','Area Population']
X=dataset[features]
# dividing data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# training algorithm
regressor = RandomForestRegressor(n_estimators=500, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
#Evaluating the algorithm
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
#visualizing the predicted value
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
ax.set_xlabel('Actual')
ax.set_ylabel('Predicted')
ax.set_title("Ground Truth vs Predicted")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment