benyaminsalimi/plot_gradient_boosting_quantile.py

## plot_gradient_boosting_quantile.py
"""
=====================================================
Prediction Intervals for Gradient Boosting Regression
=====================================================

This example shows how quantile regression can be used
to create prediction intervals.
the regression model is created for coal data.

Author : Benyamin Salimi  <benyamin.salimi@gmail.com>

credit : http://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html

the code is part of https://github.com/benyaminsalimi/coal-simulation-software
"""

import numpy as np
import matplotlib.pyplot as plt
from sklearn import ensemble
from pandas import read_excel
from sklearn.utils import shuffle

print(__doc__)


# #############################################################################
# Load data
#reading excel file
data = read_excel('benyamin.xlsx', sheet_name='Sheet1')
#choose target column
target =data.as_matrix(columns=['HGI'])
#delete target from data farm object
data = data.drop(columns=['HGI'])
#shuffle the data
X, y = shuffle(data,target, random_state=100)
y= y.ravel()
X = X.astype(np.float32)
#separating data to test and train 0.9 means 90% of data is used for train and the other is used for test
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

# #############################################################################
# Fit regression model
params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,
          'learning_rate': 0.01, 'loss': 'quantile','alpha': 0.95 }

clf = ensemble.GradientBoostingRegressor(**params)
alpha = 0.95
xx= X_test
clf.fit(X_train, y_train)
# Make the prediction on the meshed x-axis
y_upper = clf.predict(xx)


clf.set_params(alpha=1.0 - alpha)
clf.fit(X_train, y_train)

# Make the prediction on the meshed x-axis
y_lower = clf.predict(xx)

clf.set_params(loss='ls')
clf.fit(X_train, y_train)

# Make the prediction on the meshed x-axis
y_pred = clf.predict(xx)

# Plot the function, the prediction and the 90% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(y_test, 'b.', markersize=10, label=u'Target')
plt.plot(y_pred, 'r-', label=u'Prediction')
plt.plot(y_upper, 'k-')
plt.plot(y_lower, 'k-')
plt.fill_between(list(range(0,len(y_test))),y_upper,y_lower,alpha=0.4, label='prediction interval')
plt.ylabel('$ Value $')
plt.legend(loc='upper right')
plt.show()
	"""
	=====================================================
	Prediction Intervals for Gradient Boosting Regression
	=====================================================

	This example shows how quantile regression can be used
	to create prediction intervals.
	the regression model is created for coal data.

	Author : Benyamin Salimi <benyamin.salimi@gmail.com>

	credit : http://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html

	the code is part of https://github.com/benyaminsalimi/coal-simulation-software
	"""

	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn import ensemble
	from pandas import read_excel
	from sklearn.utils import shuffle

	print(__doc__)



	# #############################################################################
	# Load data
	#reading excel file
	data = read_excel('benyamin.xlsx', sheet_name='Sheet1')
	#choose target column
	target =data.as_matrix(columns=['HGI'])
	#delete target from data farm object
	data = data.drop(columns=['HGI'])
	#shuffle the data
	X, y = shuffle(data,target, random_state=100)
	y= y.ravel()
	X = X.astype(np.float32)
	#separating data to test and train 0.9 means 90% of data is used for train and the other is used for test
	offset = int(X.shape[0] * 0.9)
	X_train, y_train = X[:offset], y[:offset]
	X_test, y_test = X[offset:], y[offset:]

	# #############################################################################
	# Fit regression model
	params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 2,
	'learning_rate': 0.01, 'loss': 'quantile','alpha': 0.95 }

	clf = ensemble.GradientBoostingRegressor(**params)
	alpha = 0.95
	xx= X_test
	clf.fit(X_train, y_train)
	# Make the prediction on the meshed x-axis
	y_upper = clf.predict(xx)


	clf.set_params(alpha=1.0 - alpha)
	clf.fit(X_train, y_train)

	# Make the prediction on the meshed x-axis
	y_lower = clf.predict(xx)

	clf.set_params(loss='ls')
	clf.fit(X_train, y_train)

	# Make the prediction on the meshed x-axis
	y_pred = clf.predict(xx)

	# Plot the function, the prediction and the 90% confidence interval based on
	# the MSE
	fig = plt.figure()
	plt.plot(y_test, 'b.', markersize=10, label=u'Target')
	plt.plot(y_pred, 'r-', label=u'Prediction')
	plt.plot(y_upper, 'k-')
	plt.plot(y_lower, 'k-')
	plt.fill_between(list(range(0,len(y_test))),y_upper,y_lower,alpha=0.4, label='prediction interval')
	plt.ylabel('$ Value $')
	plt.legend(loc='upper right')
	plt.show()