Skip to content

Instantly share code, notes, and snippets.

@MartyC-137
Last active September 2, 2021 10:06
Show Gist options
  • Save MartyC-137/eaf4b00f01dcf3157540fcd1335aa6db to your computer and use it in GitHub Desktop.
Save MartyC-137/eaf4b00f01dcf3157540fcd1335aa6db to your computer and use it in GitHub Desktop.
#load data --------------------
df = pd.read_csv('analytes.csv')
df_ml = df[['MASTERID'] + [col for col in df if 'ICP' in col]]
#train-test split -------------------------------------------------------------
X = df_ml.loc[:, ~df_ml.columns.isin(['Cu_ICP_PPM'])].drop('MASTERID', axis = 1)
y = df_ml['Cu_ICP_PPM']
X_train, X_test, y_train, y_test = train_test_split(X, y)
#run models -------------------------------------
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.linear_model import Ridge
import xgboost as xgb
from sklearn.metrics import r2_score
print('Copper ML Scores: \n')
#random forest
rf = RandomForestRegressor(max_depth = 10,
n_estimators = 200)).fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print('Random Forest: {:.3f}'.format(r2_score(y_test, y_pred_rf)))
#extra trees
et = ExtraTreesRegressor(max_depth = 10,
n_estimators = 200)).fit(X_train, y_train)
y_pred_et = et.predict(X_test)
print('Extra Trees: {:.3f}'.format(r2_score(y_test, y_pred_et)))
#gradient boosting
gbr = GradientBoostingRegressor(max_depth = 10,
n_estimators = 200)).fit(X_train, y_train)
y_pred_gbr = gbr.predict(X_test)
print('Gradient Boosting: {:.3f}'.format(r2_score(y_test, y_pred_gbr)))
#basic linear regression
lr = LinearRegression().fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print('Linear Regression: {:.3f}'.format(r2_score(y_test, y_pred_lr)))
#ada boost
ada = AdaBoostRegressor().fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)
print('Ada Boost Score: {:.3f}'.format(r2_score(y_test, y_pred_ada)))
#bagging regressor
br = BaggingRegressor().fit(X_train, y_train)
y_pred_br = br.predict(X_test)
print('Bagging Regressor: {:.3f}'.format(r2_score(y_test, y_pred_br)))
#ridge
ridge = Ridge().fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
print('Ridge: {:.3f}'.format(r2_score(y_test, y_pred_ridge)))
#lasso
lasso = Lasso().fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
print('Lasso: {:.3f}'.format(r2_score(y_test, y_pred_lasso)))
#xg boost
xgb = xgb.XGBRegressor(max_depth = 10,
n_estimators = 200)).fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print('XG Boost: {:.3f}'.format(r2_score(y_test, y_pred_xgb)))
Copper ML Scores:
Random Forest: 0.696
Extra Trees: 0.708
Gradient Boosting: 0.686
Linear Regression: 0.564
Ada Boost Score: 0.269
Bagging Regressor: 0.625
Ridge: 0.567
Lasso: 0.562
XG Boost: 0.695
#blended model -----
def blended_model(x):
return ((0.25 * rf.predict(x)) + \
0.25 * et.predict(x) + \
0.25 * gbr.predict(x) + \
0.25 * xgb.predict(x))
y_pred_blended = blended_model(X_test)
print('Blended Model Score: {:.3f}'.format(r2_score(y_test, y_pred_blended)))
Blended Model Score: 0.726
#save the results to MySQL and csv -----------
cu_df = pd.DataFrame({'Actual_Cu_PPM': y_test})
cu_df['Pred_Cu_PPM'] = y_pred_blended
#add the MASTERID, latitude and longitude back in
cu_df = cu_df.merge(df[['MASTERID', 'LAT',
'LONG', 'STRAT']],
left_index = True,
right_index = True)
cu_df = cu_df[['MASTERID', 'LAT',
'LONG', 'STRAT',
'Actual_Cu_PPM', 'Pred_Cu_PPM']]
cu_df[['Actual_Cu_PPM',
'Pred_Cu_PPM']] = cu_df[['Actual_Cu_PPM',
'Pred_Cu_PPM']].astype('float').round(2)
cu_df.to_sql('quest_copper_ml_blended',
con = engine,
if_exists = 'replace',
index = False)
cu_df.to_csv('quest_cu_ml_blended.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment