sshariff01/BagLearner.py

## BagLearner.py
import numpy as np
import RTLearner as rtl
from scipy import stats
import pdb

class BagLearner(object):

    def __init__(self, learner=rtl.RTLearner, kwargs={}, bags=10, boost=False, verbose=False):
        self.learner = learner
        self.bags = bags
        self.verbose = verbose

        self.learners = []
        for i in range(0, bags):
            self.learners.append(learner(**kwargs))

    def author(self):
        return 'sshariff3'

    def addEvidence(self, dataX, dataY):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        data_all = np.hstack((dataX, dataY[:, None]))

        for learner in self.learners:
            data_subset = data_all[np.random.choice(data_all.shape[0], data_all.shape[0], replace=True)]
            learner.addEvidence(data_subset[:, : -1], data_subset[:, -1])

    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to a specific query.
        @returns the estimated values according to the saved model.
        """
        estimated_values = []
        for learner in self.learners:
            estimated_values.append(learner.query(points))

        return stats.mode(np.asarray(estimated_values), axis=0)[0][0]

if __name__== "__main__":
    print "the secret clue is 'zzyzx'"

## experiment1.py
"""
Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import datetime as dt
import pandas as pd
import util as ut
import random
import RTLearner as rtl
import BagLearner as bl
from indicators import *
from marketsimcode import *
from ManualStrategy import testPolicy as testManualStrategyPolicy
import StrategyLearner as sl
import pdb


def save_plot(df):
    ax = df.plot(title="Manual Strategy vs Learning Strategy", fontsize=12, color=['green', 'red'])
    ax.set_xlabel("Date")
    ax.set_ylabel("Normalized Portfolio Value")

    plt.savefig("experiment1")
    plt.close()

    print "Plot saved as experiment1.png"
    print ""

def run():
    symbol = "JPM"
    sv = 100000
    impact = 0.005

    ### In-Sample Dates
    sd = dt.datetime(2008, 1, 1)
    ed = dt.datetime(2009, 12, 31)

    ####
    # Learning Strategy
    ####
    learner = sl.StrategyLearner(verbose=False, impact=impact)  # constructor
    learner.addEvidence(symbol=symbol, sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=sv)

    df_trades = learner.testPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
    learning_portvals = compute_portvals(df_trades, symbol, start_val=sv, commission=0, impact=impact)

    sl.report_metrics(learning_portvals, "Learning Strategy")

    learning_portvals = learning_portvals.to_frame('Learning Strategy')
    normalized_learning_strategy = learning_portvals / learning_portvals.ix[0,:]

    ####
    # Manual Strategy
    ####
    manual_strategy_trades = testManualStrategyPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
    manual_portvals = compute_portvals(manual_strategy_trades, symbol, start_val=sv, commission=0, impact=impact)

    sl.report_metrics(manual_portvals, "Manual Strategy")

    manual_portvals = manual_portvals.to_frame('Manual Strategy')
    normalized_manual_strategy = manual_portvals / manual_portvals.ix[0,:]

    ####
    # Plot
    ####
    df_temp = normalized_manual_strategy.join(normalized_learning_strategy)

    save_plot(df_temp)


## experiment2.py
"""
Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import datetime as dt
import pandas as pd
import util as ut
import random
import RTLearner as rtl
import BagLearner as bl
from indicators import *
from marketsimcode import *
from ManualStrategy import testPolicy as testManualStrategyPolicy
import StrategyLearner as sl
import pdb


def save_plot(df):
    ax = df.plot(title="Strategy Learner with Varying Impact", fontsize=12, color=['green', 'red', 'blue'])
    ax.set_xlabel("Date")
    ax.set_ylabel("Normalized Portfolio Value")

    plt.savefig("experiment2")
    plt.close()
    print "Plot saved as experiment2.png"
    print ""

def run():
    symbol = "JPM"
    sv = 100000
    impact_trial1 = 0
    impact_trial2 = 0.05
    impact_trial3 = 0.1

    ### In-Sample Dates
    sd = dt.datetime(2008, 1, 1)
    ed = dt.datetime(2009, 12, 31)

    ####
    # Learning Strategy - TRIAL 1
    ####
    learner = sl.StrategyLearner(verbose=False, impact=impact_trial1)  # constructor
    learner.addEvidence(symbol=symbol, sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=sv)

    df_trades = learner.testPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
    learning_portvals = compute_portvals(df_trades, symbol, start_val=sv, commission=0, impact=impact_trial1)

    sl.report_metrics(learning_portvals, "Learning Strategy - Impact 0")

    learning_portvals = learning_portvals.to_frame('Impact = 0')
    normalized_learning_strategy_trial1 = learning_portvals / learning_portvals.ix[0,:]


    ####
    # Learning Strategy - TRIAL 2
    ####
    learner = sl.StrategyLearner(verbose=False, impact=impact_trial2)  # constructor
    learner.addEvidence(symbol=symbol, sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=sv)

    df_trades = learner.testPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
    learning_portvals = compute_portvals(df_trades, symbol, start_val=sv, commission=0, impact=impact_trial2)

    sl.report_metrics(learning_portvals, "Learning Strategy - Impact 0.05")

    learning_portvals = learning_portvals.to_frame('Impact = 0.05')
    normalized_learning_strategy_trial2 = learning_portvals / learning_portvals.ix[0,:]


    ####
    # Learning Strategy - TRIAL 3
    ####
    learner = sl.StrategyLearner(verbose=False, impact=impact_trial3)  # constructor
    learner.addEvidence(symbol=symbol, sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=sv)

    df_trades = learner.testPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
    learning_portvals = compute_portvals(df_trades, symbol, start_val=sv, commission=0, impact=impact_trial3)

    sl.report_metrics(learning_portvals, "Learning Strategy - Impact 0.1")

    learning_portvals = learning_portvals.to_frame('Impact = 0.1')
    normalized_learning_strategy_trial3 = learning_portvals / learning_portvals.ix[0,:]


    ####
    # Plot
    ####
    df_temp = normalized_learning_strategy_trial1.join(normalized_learning_strategy_trial2).join(normalized_learning_strategy_trial3)

    save_plot(df_temp)


## indicators.py
"""
Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import pandas as pd
import numpy as np
import datetime as dt
import os
from util import get_data, plot_data
import matplotlib.pyplot as plt
import pdb

def author():
    return 'sshariff3'

def illustrate_indicators(symbols = ['JPM'], start_date = dt.datetime(2008, 1, 1), end_date = dt.datetime(2009, 12, 31)):
    prices = get_data(symbols, pd.date_range(start_date, end_date), addSPY=False).dropna(axis=0)

    save_plot(calculate_momentum(prices), title='Technical Indicator 1: Momentum', xlabel='Date', ylabel='Momentum', filename='technical_indicator_1_momentum')
    save_plot(calculate_sma(prices), title='Technical Indicator 2: Simple Moving Average (SMA)', xlabel='Date', ylabel='Normalized Price', filename='technical_indicator_2_sma')
    save_plot(calculate_bollinger_bands(prices), title='Technical Indicator 3: Bollinger Bands', xlabel='Date', ylabel='Price', filename='technical_indicator_3_bollinger_bands')
    save_plot(calculate_macd(prices), title='Technical Indicator 4: Moving Average Convergence Divergence (MACD)', xlabel='Date', ylabel='Price', filename='technical_indicator_4_macd')

def save_plot(df, title, xlabel, ylabel, filename):
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.savefig(filename)
    plt.close()

"""
Technical Indicator 1 - Momentum
momentum[t] = price[t] / price[t-n]
  * n = 10
"""
def calculate_momentum(prices, symbol):
    prices_copy = prices.copy()
    df_temp = prices.copy()
    df_temp[1:] = (df_temp[10:] / df_temp[:-10].values) - 1
    df_temp.iloc[0:10] = np.nan
    df_temp = df_temp.rename(columns={symbol:'Momentum'})
    df_temp = prices_copy.join(df_temp)
    return df_temp

"""
Technical Indicator 3 - Bollinger Bands
  1. Compute rolling mean
  2. Compute rolling std deviation
  3. Compute upper and lower bands

  * window size = 20
"""
def calculate_bollinger_bands(prices, symbol):
    rolling_mean = get_rolling_mean(prices[symbol], window_size=20)
    rolling_std = get_rolling_std(prices[symbol], window_size=20)
    upper_band, lower_band = get_bollinger_bands(rolling_mean, rolling_std)

    rolling_mean = rolling_mean.to_frame('Rolling Mean')
    rolling_std = rolling_std.to_frame('Rolling Std')
    upper_band = upper_band.to_frame('Upper Band')
    lower_band = lower_band.to_frame('Lower Band')
    bollinger_band = (prices[symbol] - rolling_mean['Rolling Mean']) / (2 * rolling_std['Rolling Std'])
    bollinger_band = bollinger_band.to_frame('Bollinger Band')

    df_temp = prices.join(rolling_mean).join(upper_band).join(lower_band).join(bollinger_band)
    return df_temp

def get_rolling_mean(values, window_size):
    return pd.rolling_mean(values, window=window_size)

def get_rolling_std(values, window_size):
    return pd.rolling_std(values, window=window_size)

def get_bollinger_bands(rm, rstd):
    upper_band = rm + rstd*2
    lower_band = rm - rstd*2
    return upper_band, lower_band

"""
Technical Indicator 2 - SMA
  1. Plot SMA
  2. Plot Price
  3. Plot Price/SMA

  * moving window size = 10
"""
def calculate_sma(prices, symbol):
    normalized_prices = prices / prices.ix[0,:]
    rolling_mean = get_rolling_mean(normalized_prices[symbol], window_size=10)
    rolling_mean = rolling_mean.to_frame('SMA')

    df_temp = normalized_prices.join(rolling_mean)
    df_temp['Price/SMA'] = df_temp[symbol] / df_temp['SMA'] - 1
    return df_temp

"""
Technical Indicator 4 - MACD
  MACD = (12 period EMA) - (26 period EMA)

  Sources:
    * MACD - https://www.investopedia.com/terms/m/macd.asp
    * DataFrame EWM - http://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html

"""
def calculate_macd(prices):
    ema_12 = prices.ewm(span=12).mean()
    ema_12 = ema_12.rename(columns={'JPM': 'JPM 12-period Exp Moving Avg'})

    ema_26 = prices.ewm(span=26).mean()
    ema_26 = ema_26.rename(columns={'JPM': 'JPM 26-period Exp Moving Avg'})

    df_temp = ema_12.join(ema_26)
    df_temp['MACD'] = df_temp['JPM 12-period Exp Moving Avg'] - df_temp['JPM 26-period Exp Moving Avg']
    df_temp['Signal Line'] = df_temp['MACD'].ewm(span=9).mean()
    return df_temp


if __name__ == "__main__":
    illustrate_indicators()

## ManualStrategy.py
"""
Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import pandas as pd
import numpy as np
import datetime as dt
import os
from util import get_data, plot_data
from indicators import calculate_momentum, calculate_bollinger_bands, calculate_sma, calculate_macd
from marketsimcode import compute_portvals
import matplotlib.pyplot as plt
import pdb

def testPolicy(symbol = "AAPL", sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), sv = 100000):
    adjusted_close_prices = get_data([symbol], pd.date_range(sd, ed), addSPY=False).dropna()
    adjusted_close_prices.index.name = 'Date'
    momentum = calculate_momentum(adjusted_close_prices, symbol)['Momentum']
    bollinger_bands = calculate_bollinger_bands(adjusted_close_prices, symbol).drop(symbol, axis=1)
    sma = calculate_sma(adjusted_close_prices, symbol)['Price/SMA']

    portfolio = {}
    portfolio['Shares'] = 0
    portfolio['firstRun'] = True

    indicators = adjusted_close_prices.join(momentum).join(bollinger_bands).join(sma)

    trades = indicators.apply(lambda x: generate_orders(x, portfolio), axis=1, raw=False)
    trades = pd.DataFrame(trades, columns=[symbol])
    return trades

def generate_orders(indicators, portfolio):
    order = 0
    if portfolio['firstRun'] == True:
        portfolio['firstRun'] = False
        indicators['Orders'] = 0
        return indicators['Orders']

    if indicators['Price/SMA'] < 0 and indicators['Bollinger Band'] < 1 and indicators['Momentum'] < 0:
        if portfolio['Shares'] < 1000: # BUY 1000 shares
            order = 1000
            portfolio['Shares'] = portfolio['Shares'] + 1000

    elif indicators['Price/SMA'] > 0 and indicators['Bollinger Band'] > -1 and indicators['Momentum'] > 0:
        if portfolio['Shares'] > -1000: # SELL 1000 shares
            order = -1000
            portfolio['Shares'] = portfolio['Shares'] - 1000

    elif indicators['Price/SMA'] > 0.5 and indicators['Bollinger Band'] > 1 and indicators['Momentum'] < 0:
        if portfolio['Shares'] > 0: # SELL 2000 shares
            order = -2000
            portfolio['Shares'] = portfolio['Shares'] - 2000
        elif portfolio['Shares'] == 0: # SELL 1000 shares
            order = -1000
            portfolio['Shares'] = portfolio['Shares'] - 1000

    elif indicators['Momentum'] < 0:
        if portfolio['Shares'] >= 0: # SELL 1000 shares
            order = -1000
            portfolio['Shares'] = portfolio['Shares'] - 1000

    elif indicators['Momentum'] > 0:
        if portfolio['Shares'] <= 0: # BUY 1000 shares
            order = 1000
            portfolio['Shares'] = portfolio['Shares'] + 1000

    indicators['Orders'] = order
    return indicators['Orders']

def compute_daily_returns(df):
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:] / df[:-1].values) - 1
    return daily_returns.ix[1:]

def report_metrics(port_vals, strategy):
    daily_rets = compute_daily_returns(port_vals)
    cr = (port_vals[-1]/port_vals[0]) - 1
    adr = daily_rets.mean()
    sddr = daily_rets.std()

    print "Cumulative Return of {}: {}".format(strategy, cr)
    print "Standard Deviation of {}: {}".format(strategy, sddr)
    print "Average Daily Return of {}: {}".format(strategy, adr)
    print

def save_plot(df, sell_lines, buy_lines, plot_title):
    ax = df.plot(title="Manual Strategy vs Benchmark", fontsize=12, color=['green', 'red'])
    ax.set_xlabel("Date")
    ax.set_ylabel("Normalized Portfolio Value")

    for sell_dates in sell_lines:
        plt.axvline(x=sell_dates, color='black', linestyle='--', label='SHORT Entry Points')

    for buy_dates in buy_lines:
        plt.axvline(x=buy_dates, color='blue', linestyle='--', label='LONG Entry Points')

    plt.savefig(plot_title)
    plt.close()

def test_sample_data(sd = dt.datetime(2008, 1, 1), ed = dt.datetime(2009, 12, 31), plot_title=""):
    symbol = "JPM"
    start_val = 100000

    ####
    # Manual Strategy
    ####
    df_trades = testPolicy(symbol=symbol, sd=sd, ed=ed, sv=start_val)
    df_trades = df_trades.to_frame('Order')
    manual_portvals = compute_portvals(df_trades, symbol, start_val=start_val, commission=9.95, impact=0.005)

    report_metrics(manual_portvals, plot_title)

    manual_portvals = manual_portvals.to_frame('Manual Strategy')
    normalized_manual_strategy = manual_portvals / manual_portvals.ix[0,:]

    ####
    # Benchmark
    ####
    benchmark_trades = df_trades.copy()
    adjusted_close_prices = get_data([symbol], pd.date_range(sd, ed), addSPY=False).dropna()
    adjusted_close_prices.index.name = 'Date'
    first_transaction_date = adjusted_close_prices.index[adjusted_close_prices['JPM'] != 0][0]
    benchmark_trades['Order'] = 0
    benchmark_trades.loc[first_transaction_date]['Order'] = 1000
    benchmark_portvals = compute_portvals(benchmark_trades, symbol, start_val=start_val, commission=9.95, impact=0.005)

    report_metrics(benchmark_portvals, 'Benchmark')

    benchmark_portvals = benchmark_portvals.to_frame('Benchmark')
    normalized_benchmark = benchmark_portvals / benchmark_portvals.ix[0,:]

    ####
    # Plot
    ####
    df_temp = normalized_benchmark.join(normalized_manual_strategy)

    sell_lines = df_trades[df_trades.values < 0].index
    buy_lines = df_trades[df_trades.values > 0].index

    save_plot(df_temp, sell_lines, buy_lines, plot_title)

if __name__ == "__main__":
    test_sample_data(sd = dt.datetime(2008, 1, 1), ed = dt.datetime(2009, 12, 31), plot_title = "Manual Strategy - In Sample") # Task 3
    test_sample_data(sd = dt.datetime(2010, 1, 1), ed = dt.datetime(2011, 12, 31), plot_title = "Manual Strategy - Out Sample") # Task 4


## marketsimcode.py
"""MC2-P1: Market simulator.

Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved

Template code for CS 4646/7646

Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab.  This copyright statement should not be removed
or edited.

We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.

-----do not edit anything above this line---

Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import pandas as pd
import numpy as np
import datetime as dt
import os
from util import get_data, plot_data
import pdb

def author():
    return 'sshariff3'

def compute_portvals(df_trades, symbol, start_val = 1000000, commission=9.95, impact=0.005):
    # this is the function the autograder will call to test your code
    # NOTE: orders_file may be a string, or it may be a file object. Your
    # code should work correctly with either input
    # TODO: Your code here

    orders = df_trades.sort_index()
    start_date = orders.index[0]
    end_date = orders.index[-1]
    adjusted_close_prices = get_data([symbol], pd.date_range(start_date, end_date), addSPY=False)
    adjusted_close_prices.index.name = 'Date'
    adjusted_close_prices['PORTFOLIO'] = 0

    portfolio = dict.fromkeys([symbol], 0)
    portfolio['CASH'] = start_val

    return adjusted_close_prices.apply(lambda x: calculate_portfolio_value(x, orders, symbol, portfolio, commission, impact), axis=1, raw=False).dropna()

def calculate_portfolio_value(prices, orders_all, symbol, portfolio, commission, impact):
    if prices.name in orders_all.index:
        orders_for_the_day = orders_all.loc[orders_all.index == prices.name, [symbol]]

        orders_for_the_day.apply(lambda x: update_portfolio(x, prices, symbol, portfolio, commission, impact), axis=1, raw=False)

    prices['PORTFOLIO'] = portfolio['CASH'] # Initialize to cash holdings amount
    for sym in portfolio.keys():
        if sym == 'CASH':
            continue
        num_shares_in_portfolio = portfolio[sym]
        prices['PORTFOLIO'] = prices['PORTFOLIO'] + num_shares_in_portfolio * prices[sym]

    return prices['PORTFOLIO']

def update_portfolio(order, prices, purchase_symbol, portfolio, commission, impact):
    trade_num_shares = order[purchase_symbol]
    stock_price = prices[purchase_symbol]

    # Update Portfolio Shares and Cash Holdings
    if trade_num_shares > 0:
        portfolio[purchase_symbol] = portfolio[purchase_symbol] + trade_num_shares

	# Apply market impact - Price goes up by impact prior to purchase
        stock_price = stock_price * (1 + impact)
        portfolio['CASH'] = portfolio['CASH'] - trade_num_shares * stock_price

        # Apply commission - To be applied on every transaction, regardless of BUY or SELL
        portfolio['CASH'] = portfolio['CASH'] - commission
    elif trade_num_shares < 0:
        trade_num_shares = -1 * trade_num_shares
        portfolio[purchase_symbol] = portfolio[purchase_symbol] - trade_num_shares

	# Apply market impact - Price goes down by impact prior to sell
        stock_price = stock_price * (1 - impact)
        portfolio['CASH'] = portfolio['CASH'] + trade_num_shares * stock_price

        # Apply commission - To be applied on every transaction, regardless of BUY or SELL
        portfolio['CASH'] = portfolio['CASH'] - commission

if __name__ == "__main__":
    compute_portvals()
    # test_code()

## RTLearner.py
import numpy as np
import random
import pdb


class RTLearner(object):

    def __init__(self, verbose = False, leaf_size = 1):
        self.verbose = verbose
        self.leaf_size = leaf_size

    def author(self):
        return 'sshariff3'

    def addEvidence(self, dataX, dataY):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        self.tree = self.build_tree(dataX, dataY)

    def build_tree(self, dataX, dataY):
        if dataX.shape[0] <= self.leaf_size:
            return np.asarray(["Leaf", dataY.mean(), np.nan, np.nan])

        elif np.unique(dataY).shape[0] == 1:
            return np.asarray(["Leaf", dataY[0], np.nan, np.nan])

        else:
            best_feature_index = self.best_feature_index(dataX, dataY)
            split_val = np.median(dataX[:, best_feature_index])

            max_val = np.max(dataX[:, best_feature_index])
            min_val = np.min(dataX[:, best_feature_index])
            if split_val == max_val or split_val == min_val:
                return np.array([["Leaf", np.mean(dataX[:, -1]), -1, -1]])

            data = np.hstack((dataX, dataY[:, None]))
            left_tree = self.build_left_tree(data, best_feature_index, split_val)
            right_tree = self.build_right_tree(data, best_feature_index, split_val)

            if len(left_tree.shape) < 2:
                root = np.asarray([best_feature_index, split_val, 1, 2])
            else:
                root = np.asarray([best_feature_index, split_val, 1, left_tree.shape[0] + 1])

            return np.vstack((root, left_tree, right_tree))

    def build_left_tree(self, data, best_feature_index, split_val):
        data_left = data[data[:, best_feature_index] <= split_val]
        return self.build_tree(data_left[:, : -1], data_left[:, -1])

    def build_right_tree(self, data, best_feature_index, split_val):
        data_right = data[data[:, best_feature_index] > split_val]
        return self.build_tree(data_right[:, : -1], data_right[:, -1])

    def best_feature_index(self, dataX, dataY):
        return random.randint(0, dataX.shape[1] - 1)

    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to a specific query.
        @returns the estimated values according to the saved model.
        """
        estimated_values = []
        for row_num in range(0, points.shape[0]):
            value = self.query_tree(points[row_num, :], 0)
            estimated_values.append(float(value))

        return np.asarray(estimated_values)

    def query_tree(self, test, index):
        if self.tree[index, 0] == 'Leaf':
            return self.tree[index, 1]

        point = float(self.tree[index, 0])
        split_val = float(self.tree[index, 1])

        if test[int(point)] <= split_val:
            return self.query_tree(test, index + int(float(self.tree[index, 2])))
        else:
            return self.query_tree(test, index + int(float(self.tree[index, 3])))

if __name__=="__main__":
    print "the secret clue is 'zzyzx'"

## StrategyLearner.py
"""
Template for implementing StrategyLearner  (c) 2016 Tucker Balch

Copyright 2018, Georgia Institute of Technology (Georgia Tech)
Atlanta, Georgia 30332
All Rights Reserved

Template code for CS 4646/7646

Georgia Tech asserts copyright ownership of this template and all derivative
works, including solutions to the projects assigned in this course. Students
and other users of this template code are advised not to share it with others
or to make it available on publicly viewable websites including repositories
such as github and gitlab.  This copyright statement should not be removed
or edited.

We do grant permission to share solutions privately with non-students such
as potential employers. However, sharing with other current or future
students of CS 7646 is prohibited and subject to being investigated as a
GT honor code violation.

-----do not edit anything above this line---

Student Name: Shoabe Shariff
GT User ID: sshariff3
GT ID: 903272097
"""

import datetime as dt
import pandas as pd
import util as ut
import random
import RTLearner as rtl
import BagLearner as bl
from indicators import *
from marketsimcode import *
from ManualStrategy import testPolicy as testManualStrategyPolicy
import experiment1
import experiment2
import pdb


class StrategyLearner(object):

    # constructor
    def __init__(self, verbose = False, impact=0.0):
        self.verbose = verbose
        self.impact = impact
	self.learner = bl.BagLearner(learner = rtl.RTLearner, kwargs={"verbose": False, "leaf_size": 5}, bags = 20)

    def author(self):
        return 'sshariff3'

    # this method should create a RTLearner, and train it for trading
    def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv = 10000):

        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        # prices_SPY = prices_all['SPY']  # only SPY, for comparison later

        prices = prices_all[syms]  # only portfolio symbols
        prices.index.name = 'Date'

        momentum = calculate_momentum(prices, symbol)['Momentum']
        bollinger_bands = calculate_bollinger_bands(prices, symbol).drop(symbol, axis=1)
        sma = calculate_sma(prices, symbol)['Price/SMA']
        indicators = prices.join(momentum).join(bollinger_bands).join(sma)

        N = 5

        indicators = indicators[:(-1 * N)]
        indicators.fillna(0, inplace=True)
        dataX = indicators.values

        dataY = self.calculate_y_values(prices, symbol, N)[:-1 * N]

        self.learner.addEvidence(dataX, dataY)

    def calculate_y_values(self, prices, symbol, N):
        adjusted_close_prices = prices.copy()
        adjusted_close_prices = adjusted_close_prices.rename(columns={symbol: 'Today'})
        adjusted_close_prices['N-Day'] = adjusted_close_prices['Today'].shift(-1 * N)

        return adjusted_close_prices.apply(lambda x: self.calculate_y(x), axis=1, raw=False)

    def calculate_y(self, price_data):
        ret = price_data['N-Day'] / price_data['Today'] - 1.0
        YBUY = 0
        YSELL = 0

        if ret > (YBUY + self.impact):
            return 1.0
        elif ret < (YSELL + (-1 * self.impact)):
            return -1.0

        return 0

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv = 10000):

        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        # prices_SPY = prices_all['SPY']  # only SPY, for comparison later

        prices = prices_all[syms]  # only portfolio symbols

        momentum = calculate_momentum(prices, symbol)['Momentum']
        bollinger_bands = calculate_bollinger_bands(prices, symbol).drop(symbol, axis=1)
        sma = calculate_sma(prices, symbol)['Price/SMA']
        indicators = prices.join(momentum).join(bollinger_bands).join(sma)
        indicators.fillna(0, inplace=True)

        dataX = indicators.values

        dataY = self.learner.query(dataX)

        info = {'Shares': 0, 'firstRun': True}

        dataAll = indicators.copy()
        dataAll['dataY'] = dataY

        trades = dataAll.apply(lambda x: self.generate_orders(x, info), axis=1, raw=False)
        trades = pd.DataFrame(trades, columns=[symbol])

        return trades

    def generate_orders(self, data, info):
        dataY = data['dataY']

        if info['firstRun']:
            info['firstRun'] = False
            data['Orders'] = 0
            return data['Orders']

        order = 0  # CASH
        if dataY > 0 and info['Shares'] < 1000:  # LONG
            order = 1000 - info['Shares']
            info['Shares'] = 1000
        elif dataY < 0 and info['Shares'] > -1000:  # SHORT
            order = -1000 - info['Shares']
            info['Shares'] = -1000

        data['Orders'] = order
        return data['Orders']

def compute_daily_returns(df):
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:] / df[:-1].values) - 1
    return daily_returns.ix[1:]

def report_metrics(port_vals, strategy):
    daily_rets = compute_daily_returns(port_vals)
    cr = (port_vals[-1]/port_vals[0]) - 1
    adr = daily_rets.mean()
    sddr = daily_rets.std()

    print "Cumulative Return of {}: {}".format(strategy, cr)
    print "Standard Deviation of {}: {}".format(strategy, sddr)
    print "Average Daily Return of {}: {}".format(strategy, adr)
    print

def run_experiment1():
    experiment1.run()

def run_experiment2():
    experiment2.run()


if __name__ == "__main__":
    print "One does not simply think up a strategy"

    print "################"
    print "# Experiment 1 #"
    print "################"
    run_experiment1()

    print "################"
    print "# Experiment 2 #"
    print "################"
    run_experiment2()
	import numpy as np
	import RTLearner as rtl
	from scipy import stats
	import pdb

	class BagLearner(object):

	def __init__(self, learner=rtl.RTLearner, kwargs={}, bags=10, boost=False, verbose=False):
	self.learner = learner
	self.bags = bags
	self.verbose = verbose

	self.learners = []
	for i in range(0, bags):
	self.learners.append(learner(**kwargs))

	def author(self):
	return 'sshariff3'

	def addEvidence(self, dataX, dataY):
	"""
	@summary: Add training data to learner
	@param dataX: X values of data to add
	@param dataY: the Y training values
	"""
	data_all = np.hstack((dataX, dataY[:, None]))

	for learner in self.learners:
	data_subset = data_all[np.random.choice(data_all.shape[0], data_all.shape[0], replace=True)]
	learner.addEvidence(data_subset[:, : -1], data_subset[:, -1])

	def query(self, points):
	"""
	@summary: Estimate a set of test points given the model we built.
	@param points: should be a numpy array with each row corresponding to a specific query.
	@returns the estimated values according to the saved model.
	"""
	estimated_values = []
	for learner in self.learners:
	estimated_values.append(learner.query(points))

	return stats.mode(np.asarray(estimated_values), axis=0)[0][0]

	if __name__== "__main__":
	print "the secret clue is 'zzyzx'"
	"""
	Student Name: Shoabe Shariff
	GT User ID: sshariff3
	GT ID: 903272097
	"""

	import datetime as dt
	import pandas as pd
	import util as ut
	import random
	import RTLearner as rtl
	import BagLearner as bl
	from indicators import *
	from marketsimcode import *
	from ManualStrategy import testPolicy as testManualStrategyPolicy
	import StrategyLearner as sl
	import pdb


	def save_plot(df):
	ax = df.plot(title="Manual Strategy vs Learning Strategy", fontsize=12, color=['green', 'red'])
	ax.set_xlabel("Date")
	ax.set_ylabel("Normalized Portfolio Value")

	plt.savefig("experiment1")
	plt.close()

	print "Plot saved as experiment1.png"
	print ""

	def run():
	symbol = "JPM"
	sv = 100000
	impact = 0.005

	### In-Sample Dates
	sd = dt.datetime(2008, 1, 1)
	ed = dt.datetime(2009, 12, 31)

	####
	# Learning Strategy
	####
	learner = sl.StrategyLearner(verbose=False, impact=impact) # constructor
	learner.addEvidence(symbol=symbol, sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=sv)

	df_trades = learner.testPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
	learning_portvals = compute_portvals(df_trades, symbol, start_val=sv, commission=0, impact=impact)

	sl.report_metrics(learning_portvals, "Learning Strategy")

	learning_portvals = learning_portvals.to_frame('Learning Strategy')
	normalized_learning_strategy = learning_portvals / learning_portvals.ix[0,:]

	####
	# Manual Strategy
	####
	manual_strategy_trades = testManualStrategyPolicy(symbol=symbol, sd=sd, ed=ed, sv=sv)
	manual_portvals = compute_portvals(manual_strategy_trades, symbol, start_val=sv, commission=0, impact=impact)

	sl.report_metrics(manual_portvals, "Manual Strategy")

	manual_portvals = manual_portvals.to_frame('Manual Strategy')
	normalized_manual_strategy = manual_portvals / manual_portvals.ix[0,:]

	####
	# Plot
	####
	df_temp = normalized_manual_strategy.join(normalized_learning_strategy)

	save_plot(df_temp)
	"""MC2-P1: Market simulator.

	Copyright 2018, Georgia Institute of Technology (Georgia Tech)
	Atlanta, Georgia 30332
	All Rights Reserved

	Template code for CS 4646/7646

	Georgia Tech asserts copyright ownership of this template and all derivative
	works, including solutions to the projects assigned in this course. Students
	and other users of this template code are advised not to share it with others
	or to make it available on publicly viewable websites including repositories
	such as github and gitlab. This copyright statement should not be removed
	or edited.

	We do grant permission to share solutions privately with non-students such
	as potential employers. However, sharing with other current or future
	students of CS 7646 is prohibited and subject to being investigated as a
	GT honor code violation.

	-----do not edit anything above this line---

	Student Name: Shoabe Shariff
	GT User ID: sshariff3
	GT ID: 903272097
	"""

	import pandas as pd
	import numpy as np
	import datetime as dt
	import os
	from util import get_data, plot_data
	import pdb

	def author():
	return 'sshariff3'

	def compute_portvals(df_trades, symbol, start_val = 1000000, commission=9.95, impact=0.005):
	# this is the function the autograder will call to test your code
	# NOTE: orders_file may be a string, or it may be a file object. Your
	# code should work correctly with either input
	# TODO: Your code here

	orders = df_trades.sort_index()
	start_date = orders.index[0]
	end_date = orders.index[-1]
	adjusted_close_prices = get_data([symbol], pd.date_range(start_date, end_date), addSPY=False)
	adjusted_close_prices.index.name = 'Date'
	adjusted_close_prices['PORTFOLIO'] = 0

	portfolio = dict.fromkeys([symbol], 0)
	portfolio['CASH'] = start_val

	return adjusted_close_prices.apply(lambda x: calculate_portfolio_value(x, orders, symbol, portfolio, commission, impact), axis=1, raw=False).dropna()

	def calculate_portfolio_value(prices, orders_all, symbol, portfolio, commission, impact):
	if prices.name in orders_all.index:
	orders_for_the_day = orders_all.loc[orders_all.index == prices.name, [symbol]]

	orders_for_the_day.apply(lambda x: update_portfolio(x, prices, symbol, portfolio, commission, impact), axis=1, raw=False)

	prices['PORTFOLIO'] = portfolio['CASH'] # Initialize to cash holdings amount
	for sym in portfolio.keys():
	if sym == 'CASH':
	continue
	num_shares_in_portfolio = portfolio[sym]
	prices['PORTFOLIO'] = prices['PORTFOLIO'] + num_shares_in_portfolio * prices[sym]

	return prices['PORTFOLIO']

	def update_portfolio(order, prices, purchase_symbol, portfolio, commission, impact):
	trade_num_shares = order[purchase_symbol]
	stock_price = prices[purchase_symbol]

	# Update Portfolio Shares and Cash Holdings
	if trade_num_shares > 0:
	portfolio[purchase_symbol] = portfolio[purchase_symbol] + trade_num_shares

	# Apply market impact - Price goes up by impact prior to purchase
	stock_price = stock_price * (1 + impact)
	portfolio['CASH'] = portfolio['CASH'] - trade_num_shares * stock_price

	# Apply commission - To be applied on every transaction, regardless of BUY or SELL
	portfolio['CASH'] = portfolio['CASH'] - commission
	elif trade_num_shares < 0:
	trade_num_shares = -1 * trade_num_shares
	portfolio[purchase_symbol] = portfolio[purchase_symbol] - trade_num_shares

	# Apply market impact - Price goes down by impact prior to sell
	stock_price = stock_price * (1 - impact)
	portfolio['CASH'] = portfolio['CASH'] + trade_num_shares * stock_price

	# Apply commission - To be applied on every transaction, regardless of BUY or SELL
	portfolio['CASH'] = portfolio['CASH'] - commission

	if __name__ == "__main__":
	compute_portvals()
	# test_code()
	"""
	Template for implementing StrategyLearner (c) 2016 Tucker Balch

	Copyright 2018, Georgia Institute of Technology (Georgia Tech)
	Atlanta, Georgia 30332
	All Rights Reserved

	Template code for CS 4646/7646

	Georgia Tech asserts copyright ownership of this template and all derivative
	works, including solutions to the projects assigned in this course. Students
	and other users of this template code are advised not to share it with others
	or to make it available on publicly viewable websites including repositories
	such as github and gitlab. This copyright statement should not be removed
	or edited.

	We do grant permission to share solutions privately with non-students such
	as potential employers. However, sharing with other current or future
	students of CS 7646 is prohibited and subject to being investigated as a
	GT honor code violation.

	-----do not edit anything above this line---

	Student Name: Shoabe Shariff
	GT User ID: sshariff3
	GT ID: 903272097
	"""

	import datetime as dt
	import pandas as pd
	import util as ut
	import random
	import RTLearner as rtl
	import BagLearner as bl
	from indicators import *
	from marketsimcode import *
	from ManualStrategy import testPolicy as testManualStrategyPolicy
	import experiment1
	import experiment2
	import pdb


	class StrategyLearner(object):

	# constructor
	def __init__(self, verbose = False, impact=0.0):
	self.verbose = verbose
	self.impact = impact
	self.learner = bl.BagLearner(learner = rtl.RTLearner, kwargs={"verbose": False, "leaf_size": 5}, bags = 20)

	def author(self):
	return 'sshariff3'

	# this method should create a RTLearner, and train it for trading
	def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv = 10000):

	syms = [symbol]
	dates = pd.date_range(sd, ed)
	prices_all = ut.get_data(syms, dates) # automatically adds SPY
	# prices_SPY = prices_all['SPY'] # only SPY, for comparison later

	prices = prices_all[syms] # only portfolio symbols
	prices.index.name = 'Date'

	momentum = calculate_momentum(prices, symbol)['Momentum']
	bollinger_bands = calculate_bollinger_bands(prices, symbol).drop(symbol, axis=1)
	sma = calculate_sma(prices, symbol)['Price/SMA']
	indicators = prices.join(momentum).join(bollinger_bands).join(sma)

	N = 5

	indicators = indicators[:(-1 * N)]
	indicators.fillna(0, inplace=True)
	dataX = indicators.values

	dataY = self.calculate_y_values(prices, symbol, N)[:-1 * N]

	self.learner.addEvidence(dataX, dataY)

	def calculate_y_values(self, prices, symbol, N):
	adjusted_close_prices = prices.copy()
	adjusted_close_prices = adjusted_close_prices.rename(columns={symbol: 'Today'})
	adjusted_close_prices['N-Day'] = adjusted_close_prices['Today'].shift(-1 * N)

	return adjusted_close_prices.apply(lambda x: self.calculate_y(x), axis=1, raw=False)

	def calculate_y(self, price_data):
	ret = price_data['N-Day'] / price_data['Today'] - 1.0
	YBUY = 0
	YSELL = 0

	if ret > (YBUY + self.impact):
	return 1.0
	elif ret < (YSELL + (-1 * self.impact)):
	return -1.0

	return 0

	# this method should use the existing policy and test it against new data
	def testPolicy(self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv = 10000):

	syms = [symbol]
	dates = pd.date_range(sd, ed)
	prices_all = ut.get_data(syms, dates) # automatically adds SPY
	# prices_SPY = prices_all['SPY'] # only SPY, for comparison later

	prices = prices_all[syms] # only portfolio symbols

	momentum = calculate_momentum(prices, symbol)['Momentum']
	bollinger_bands = calculate_bollinger_bands(prices, symbol).drop(symbol, axis=1)
	sma = calculate_sma(prices, symbol)['Price/SMA']
	indicators = prices.join(momentum).join(bollinger_bands).join(sma)
	indicators.fillna(0, inplace=True)

	dataX = indicators.values

	dataY = self.learner.query(dataX)

	info = {'Shares': 0, 'firstRun': True}

	dataAll = indicators.copy()
	dataAll['dataY'] = dataY

	trades = dataAll.apply(lambda x: self.generate_orders(x, info), axis=1, raw=False)
	trades = pd.DataFrame(trades, columns=[symbol])

	return trades

	def generate_orders(self, data, info):
	dataY = data['dataY']

	if info['firstRun']:
	info['firstRun'] = False
	data['Orders'] = 0
	return data['Orders']

	order = 0 # CASH
	if dataY > 0 and info['Shares'] < 1000: # LONG
	order = 1000 - info['Shares']
	info['Shares'] = 1000
	elif dataY < 0 and info['Shares'] > -1000: # SHORT
	order = -1000 - info['Shares']
	info['Shares'] = -1000

	data['Orders'] = order
	return data['Orders']

	def compute_daily_returns(df):
	daily_returns = df.copy()
	daily_returns[1:] = (df[1:] / df[:-1].values) - 1
	return daily_returns.ix[1:]

	def report_metrics(port_vals, strategy):
	daily_rets = compute_daily_returns(port_vals)
	cr = (port_vals[-1]/port_vals[0]) - 1
	adr = daily_rets.mean()
	sddr = daily_rets.std()

	print "Cumulative Return of {}: {}".format(strategy, cr)
	print "Standard Deviation of {}: {}".format(strategy, sddr)
	print "Average Daily Return of {}: {}".format(strategy, adr)
	print

	def run_experiment1():
	experiment1.run()

	def run_experiment2():
	experiment2.run()


	if __name__ == "__main__":
	print "One does not simply think up a strategy"

	print "################"
	print "# Experiment 1 #"
	print "################"
	run_experiment1()

	print "################"
	print "# Experiment 2 #"
	print "################"
	run_experiment2()