Created
August 26, 2018 02:24
-
-
Save benyaminsalimi/c7b61f92c66f0023576774555141245a to your computer and use it in GitHub Desktop.
This example shows how regression can be used to create prediction intervals. i just change it for working in pandas data farm (coal data)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
===================================================== | |
Prediction Intervals for Gradient Boosting Regression | |
===================================================== | |
This example shows how quantile regression can be used | |
to create prediction intervals. | |
the regression model is created for coal data. | |
Author : Benyamin Salimi <benyamin.salimi@gmail.com> | |
credit : http://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html | |
the code is part of https://github.com/benyaminsalimi/coal-simulation-software | |
""" | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import ensemble | |
from pandas import read_excel | |
from sklearn.utils import shuffle | |
print(__doc__) | |
# ############################################################################# | |
# Load data | |
#reading excel file | |
data = read_excel('benyamin.xlsx', sheet_name='Sheet1') | |
#choose target column | |
target =data.as_matrix(columns=['HGI']) | |
#delete target from data farm object | |
data = data.drop(columns=['HGI']) | |
#shuffle the data | |
X, y = shuffle(data,target, random_state=100) | |
y= y.ravel() | |
X = X.astype(np.float32) | |
#separating data to test and train 0.9 means 90% of data is used for train and the other is used for test | |
offset = int(X.shape[0] * 0.9) | |
X_train, y_train = X[:offset], y[:offset] | |
X_test, y_test = X[offset:], y[offset:] | |
# ############################################################################# | |
# Fit regression model | |
params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2, | |
'learning_rate': 0.01, 'loss': 'quantile','alpha': 0.95 } | |
clf = ensemble.GradientBoostingRegressor(**params) | |
alpha = 0.95 | |
xx= X_test | |
clf.fit(X_train, y_train) | |
# Make the prediction on the meshed x-axis | |
y_upper = clf.predict(xx) | |
clf.set_params(alpha=1.0 - alpha) | |
clf.fit(X_train, y_train) | |
# Make the prediction on the meshed x-axis | |
y_lower = clf.predict(xx) | |
clf.set_params(loss='ls') | |
clf.fit(X_train, y_train) | |
# Make the prediction on the meshed x-axis | |
y_pred = clf.predict(xx) | |
# Plot the function, the prediction and the 90% confidence interval based on | |
# the MSE | |
fig = plt.figure() | |
plt.plot(y_test, 'b.', markersize=10, label=u'Target') | |
plt.plot(y_pred, 'r-', label=u'Prediction') | |
plt.plot(y_upper, 'k-') | |
plt.plot(y_lower, 'k-') | |
plt.fill_between(list(range(0,len(y_test))),y_upper,y_lower,alpha=0.4, label='prediction interval') | |
plt.ylabel('$ Value $') | |
plt.legend(loc='upper right') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment