greencoder/predict.py

## predict.py
from __future__ import print_function

import numpy as np
import pandas as pd
import talib as ta
import pandas_datareader as web

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

def make_prediction(quotes_df, estimator):

    # Make a copy of the dataframe so we don't modify the original
    df = quotes_df.copy()

    # Add the five day moving average technical indicator
    df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

    # Add the twenty day moving average technical indicator
    df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)

    # Add the fifty day moving average technical indicator
    df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

    # Add the Bollinger Bands technical indicators
    df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values,
        timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

    # Add the Relative strength index technical indicator
    df['RSI'] = ta.RSI(df['Close'].values, 14)

    # Add the Simple Moving Average (Fast & Slow) technical indicators
    df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
    df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)

    # Add the percent change of the daily closing price
    df['ClosingPctChange'] = df['Close'].pct_change()

    # Get today's record (the last record) so we can predict it later. Do this
    # before we add the 'NextDayPrice' column so we don't have to drop it later
    df_today = df.iloc[-1:, :].copy()

    # Create a column of the next day's closing prices so we can train on it
    # and then eventually predict the value
    df['NextClose'] = df['Close'].shift(-1)

    # Get rid of the rows that have NaNs
    df.dropna(inplace=True)

    # Decide which features to use for our regression. This will allow us to
    # tweak things during testing
    features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
        'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']

    # Create our target and labels
    X = df[features_to_fit]
    y = df['NextClose']

    # Create training and testing data sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
        random_state=42)

    # Do ten-fold cross-validation and compute our average accuracy
    cv = cross_val_score(estimator, X_test, y_test, cv=10)
    print('Accuracy:', cv.mean())

    # Fit the regressor with the full dataset to be used with predictions
    estimator.fit(X, y)

    # Predict today's closing price
    X_new = df_today[features_to_fit]
    next_price_prediction = estimator.predict(X_new)

    # Return the predicted closing price
    return next_price_prediction


# Choose which company to predict
symbol = 'AAPL'

# Import a year's OHLCV data from Google using DataReader
quotes_df = web.data.DataReader(symbol, 'google')

# Predict the last day's closing price using linear regression
print('Unscaled Linear Regression:')
linreg = LinearRegression()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, linreg))

# Predict the last day's closing price using Linear regression with scaled features
print('Scaled Linear Regression:')
pipe = make_pipeline(StandardScaler(), LinearRegression())
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, pipe))

# Predict the last day's closing price using ridge regression
print('Unscaled Ridge Regression:')
ridge = Ridge()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge))

# Predict the last day's closing price using ridge regression and scaled features
print('Scaled Linear Regression:')
ridge_pipe = make_pipeline(StandardScaler(), Ridge())
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge_pipe))

# Predict the last day's closing price using decision tree regression
print('Unscaled Decision Tree Regressor:')
tree = DecisionTreeRegressor()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree))

# Predict the last day's closing price using Gaussian Naive Bayes
print('Unscaled Gaussian Naive Bayes:')
nb = GaussianNB()
print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, nb))
	from __future__ import print_function

	import numpy as np
	import pandas as pd
	import talib as ta
	import pandas_datareader as web

	from sklearn.linear_model import LinearRegression
	from sklearn.linear_model import Ridge
	from sklearn.model_selection import cross_val_score
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import GaussianNB
	from sklearn.pipeline import make_pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.tree import DecisionTreeRegressor

	def make_prediction(quotes_df, estimator):

	# Make a copy of the dataframe so we don't modify the original
	df = quotes_df.copy()

	# Add the five day moving average technical indicator
	df['MA_5'] = ta.MA(df['Close'].values, timeperiod=5, matype=0)

	# Add the twenty day moving average technical indicator
	df['MA_20'] = ta.MA(df['Close'].values, timeperiod=20, matype=0)

	# Add the fifty day moving average technical indicator
	df['MA_50'] = ta.MA(df['Close'].values, timeperiod=50, matype=0)

	# Add the Bollinger Bands technical indicators
	df['BOL_Upp'], df['BOL_Mid'], df['BOL_Low'] = ta.BBANDS(df['Close'].values,
	timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

	# Add the Relative strength index technical indicator
	df['RSI'] = ta.RSI(df['Close'].values, 14)

	# Add the Simple Moving Average (Fast & Slow) technical indicators
	df['SMA_Fast'] = ta.SMA(df['Close'].values, 5)
	df['SMA_Slow'] = ta.SMA(df['Close'].values, 20)

	# Add the percent change of the daily closing price
	df['ClosingPctChange'] = df['Close'].pct_change()

	# Get today's record (the last record) so we can predict it later. Do this
	# before we add the 'NextDayPrice' column so we don't have to drop it later
	df_today = df.iloc[-1:, :].copy()

	# Create a column of the next day's closing prices so we can train on it
	# and then eventually predict the value
	df['NextClose'] = df['Close'].shift(-1)

	# Get rid of the rows that have NaNs
	df.dropna(inplace=True)

	# Decide which features to use for our regression. This will allow us to
	# tweak things during testing
	features_to_fit = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_20', 'MA_50',
	'RSI', 'SMA_Fast', 'SMA_Slow', 'BOL_Upp', 'BOL_Mid', 'BOL_Low', 'ClosingPctChange']

	# Create our target and labels
	X = df[features_to_fit]
	y = df['NextClose']

	# Create training and testing data sets
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
	random_state=42)

	# Do ten-fold cross-validation and compute our average accuracy
	cv = cross_val_score(estimator, X_test, y_test, cv=10)
	print('Accuracy:', cv.mean())

	# Fit the regressor with the full dataset to be used with predictions
	estimator.fit(X, y)

	# Predict today's closing price
	X_new = df_today[features_to_fit]
	next_price_prediction = estimator.predict(X_new)

	# Return the predicted closing price
	return next_price_prediction


	# Choose which company to predict
	symbol = 'AAPL'

	# Import a year's OHLCV data from Google using DataReader
	quotes_df = web.data.DataReader(symbol, 'google')

	# Predict the last day's closing price using linear regression
	print('Unscaled Linear Regression:')
	linreg = LinearRegression()
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, linreg))

	# Predict the last day's closing price using Linear regression with scaled features
	print('Scaled Linear Regression:')
	pipe = make_pipeline(StandardScaler(), LinearRegression())
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, pipe))

	# Predict the last day's closing price using ridge regression
	print('Unscaled Ridge Regression:')
	ridge = Ridge()
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge))

	# Predict the last day's closing price using ridge regression and scaled features
	print('Scaled Linear Regression:')
	ridge_pipe = make_pipeline(StandardScaler(), Ridge())
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, ridge_pipe))

	# Predict the last day's closing price using decision tree regression
	print('Unscaled Decision Tree Regressor:')
	tree = DecisionTreeRegressor()
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, tree))

	# Predict the last day's closing price using Gaussian Naive Bayes
	print('Unscaled Gaussian Naive Bayes:')
	nb = GaussianNB()
	print('Predicted Closing Price: %.2f\n' % make_prediction(quotes_df, nb))