Created
June 2, 2018 04:26
-
-
Save socratesk/36d787bfa0a5e3f1934fd471ed78e67b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libraries | |
import numpy as np | |
import pandas as pd | |
from sklearn.ensemble import RandomForestRegressor | |
# load data file | |
train = pd.read_csv('50-Startups.csv') | |
# perform one-hot encoding for categorical variable | |
trainDummies = pd.get_dummies(train['State'], prefix = 'state') | |
# combine original and one-hot encoded data frames together | |
train = pd.concat([train, trainDummies], axis=1) | |
# extract dependent (predictor) feature | |
y = train.Profit | |
# extract independent features from combined data frame by removing unwanted features | |
X = train.drop(["Profit", "State", "state_California"], axis=1) | |
# create regressor object | |
rf = RandomForestRegressor(n_estimators=10000, random_state=0, n_jobs=-1) | |
# fit dependent and independent features in regressor object (creates Machine Learning model) | |
rf.fit(X, y) | |
# extract feature importance, combine it with dataset's feature, and print the output in descending order | |
featureImp = pd.DataFrame({'Feature':X.columns, 'Importance':rf.feature_importances_}).sort_values('Importance', ascending=False) | |
featureImp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment