Last active
September 2, 2021 10:06
-
-
Save MartyC-137/eaf4b00f01dcf3157540fcd1335aa6db to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#load data -------------------- | |
df = pd.read_csv('analytes.csv') | |
df_ml = df[['MASTERID'] + [col for col in df if 'ICP' in col]] | |
#train-test split ------------------------------------------------------------- | |
X = df_ml.loc[:, ~df_ml.columns.isin(['Cu_ICP_PPM'])].drop('MASTERID', axis = 1) | |
y = df_ml['Cu_ICP_PPM'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y) | |
#run models ------------------------------------- | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.ensemble import ExtraTreesRegressor | |
from sklearn.ensemble import GradientBoostingRegressor | |
from sklearn.linear_model import Lasso | |
from sklearn.linear_model import LinearRegression | |
from sklearn.ensemble import AdaBoostRegressor | |
from sklearn.ensemble import BaggingRegressor | |
from sklearn.linear_model import Ridge | |
import xgboost as xgb | |
from sklearn.metrics import r2_score | |
print('Copper ML Scores: \n') | |
#random forest | |
rf = RandomForestRegressor(max_depth = 10, | |
n_estimators = 200)).fit(X_train, y_train) | |
y_pred_rf = rf.predict(X_test) | |
print('Random Forest: {:.3f}'.format(r2_score(y_test, y_pred_rf))) | |
#extra trees | |
et = ExtraTreesRegressor(max_depth = 10, | |
n_estimators = 200)).fit(X_train, y_train) | |
y_pred_et = et.predict(X_test) | |
print('Extra Trees: {:.3f}'.format(r2_score(y_test, y_pred_et))) | |
#gradient boosting | |
gbr = GradientBoostingRegressor(max_depth = 10, | |
n_estimators = 200)).fit(X_train, y_train) | |
y_pred_gbr = gbr.predict(X_test) | |
print('Gradient Boosting: {:.3f}'.format(r2_score(y_test, y_pred_gbr))) | |
#basic linear regression | |
lr = LinearRegression().fit(X_train, y_train) | |
y_pred_lr = lr.predict(X_test) | |
print('Linear Regression: {:.3f}'.format(r2_score(y_test, y_pred_lr))) | |
#ada boost | |
ada = AdaBoostRegressor().fit(X_train, y_train) | |
y_pred_ada = ada.predict(X_test) | |
print('Ada Boost Score: {:.3f}'.format(r2_score(y_test, y_pred_ada))) | |
#bagging regressor | |
br = BaggingRegressor().fit(X_train, y_train) | |
y_pred_br = br.predict(X_test) | |
print('Bagging Regressor: {:.3f}'.format(r2_score(y_test, y_pred_br))) | |
#ridge | |
ridge = Ridge().fit(X_train, y_train) | |
y_pred_ridge = ridge.predict(X_test) | |
print('Ridge: {:.3f}'.format(r2_score(y_test, y_pred_ridge))) | |
#lasso | |
lasso = Lasso().fit(X_train, y_train) | |
y_pred_lasso = lasso.predict(X_test) | |
print('Lasso: {:.3f}'.format(r2_score(y_test, y_pred_lasso))) | |
#xg boost | |
xgb = xgb.XGBRegressor(max_depth = 10, | |
n_estimators = 200)).fit(X_train, y_train) | |
y_pred_xgb = xgb.predict(X_test) | |
print('XG Boost: {:.3f}'.format(r2_score(y_test, y_pred_xgb))) | |
Copper ML Scores: | |
Random Forest: 0.696 | |
Extra Trees: 0.708 | |
Gradient Boosting: 0.686 | |
Linear Regression: 0.564 | |
Ada Boost Score: 0.269 | |
Bagging Regressor: 0.625 | |
Ridge: 0.567 | |
Lasso: 0.562 | |
XG Boost: 0.695 | |
#blended model ----- | |
def blended_model(x): | |
return ((0.25 * rf.predict(x)) + \ | |
0.25 * et.predict(x) + \ | |
0.25 * gbr.predict(x) + \ | |
0.25 * xgb.predict(x)) | |
y_pred_blended = blended_model(X_test) | |
print('Blended Model Score: {:.3f}'.format(r2_score(y_test, y_pred_blended))) | |
Blended Model Score: 0.726 | |
#save the results to MySQL and csv ----------- | |
cu_df = pd.DataFrame({'Actual_Cu_PPM': y_test}) | |
cu_df['Pred_Cu_PPM'] = y_pred_blended | |
#add the MASTERID, latitude and longitude back in | |
cu_df = cu_df.merge(df[['MASTERID', 'LAT', | |
'LONG', 'STRAT']], | |
left_index = True, | |
right_index = True) | |
cu_df = cu_df[['MASTERID', 'LAT', | |
'LONG', 'STRAT', | |
'Actual_Cu_PPM', 'Pred_Cu_PPM']] | |
cu_df[['Actual_Cu_PPM', | |
'Pred_Cu_PPM']] = cu_df[['Actual_Cu_PPM', | |
'Pred_Cu_PPM']].astype('float').round(2) | |
cu_df.to_sql('quest_copper_ml_blended', | |
con = engine, | |
if_exists = 'replace', | |
index = False) | |
cu_df.to_csv('quest_cu_ml_blended.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment