Created
January 14, 2019 10:54
-
-
Save talolard/29fbf89605dc058fa5a190b19c02b5ef to your computer and use it in GitHub Desktop.
A crappy environemtn for RL on a portfolio of stocks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from collections import defaultdict | |
from env.priceGenerator import make_stock | |
costPerShare = 0 # 0.01 | |
class Env: | |
''' | |
A simple environemnt for our agent, | |
the action our agent gives is weighting over the stocks + cash | |
the env calcutes that into stock and figures out the returns | |
''' | |
def __init__(self,price_fn,num_stocks=2,length=2,starting_value=1000,lookback=10): | |
''' | |
:param price_fn: A function that returns a numpy array of prices | |
:param num_stocks: How many stocks in our univerese | |
:param length: The length of an episode | |
''' | |
self.num_stocks = num_stocks | |
self.lookback = lookback | |
self.length = length | |
self.oprices= price_fn(num_stocks=num_stocks,length=length) | |
self.prices = np.concatenate([self.oprices,np.ones([length+1,1])],axis=1) #attach the value of cash | |
self.portfolio = np.zeros([num_stocks+1]) #2k and 2k+1 are te long and short of a stock. portfolio[-1] is cash | |
self.portfolio[-1] = 1 | |
self.time =0 | |
self.__account_value = starting_value | |
self.__shares=np.array([0]*num_stocks +[starting_value]) | |
self.hist = defaultdict(list) | |
@property | |
def shares(self): | |
return self.__shares | |
@property | |
def account_value(self): | |
return self.__account_value | |
@shares.setter | |
def shares(self,new_shares): | |
self.__shares = new_shares | |
self.hist['shares'].append(self.shares) | |
@account_value.setter | |
def account_value(self,new_act_val): | |
self.__account_value = new_act_val | |
try: | |
act_returns = self.account_value / self.hist['act_val'][-1] | |
except: | |
act_returns =1 | |
self.hist['act_val'].append(self.account_value) | |
self.hist['act_returns'].append(act_returns) | |
def step(self,new_portfolio): | |
''' | |
Get the next prices. Then transition the value of the account into the desired portfolio | |
:param new_portfolio: | |
:return: | |
''' | |
self.time +=1 | |
self.update_acount_value(new_portfolio) | |
reward = np.log(self.hist['act_returns'][-1]) #already includes transaction costs | |
state = { | |
"prices":self.prices[self.time-self.lookback+1:self.time+1,:-1], # All prices upto now inclusive but no cash | |
"portfolio":self.portfolio, | |
} | |
done = self.time >=len(self.prices)-2 | |
return state,reward,done | |
def update_acount_value(self,new_portfolio): | |
currentShareValues = self.shares * self.prices[self.time] | |
currentAccountValue = sum(currentShareValues) | |
currentPortfolioProportions = currentShareValues / currentAccountValue | |
desiredCashChange = (new_portfolio -currentPortfolioProportions )* currentAccountValue | |
desiredChangeInShares = np.floor(desiredCashChange / self.prices[self.time]) | |
self.shares = self.shares + desiredChangeInShares | |
newAccountValue = np.sum(self.shares*self.prices[self.time]) | |
#becuse we take the floor, sometimes we lose cash for no reason. This is a fix | |
missingCash = currentAccountValue - newAccountValue | |
transactionCost = sum(np.abs(desiredChangeInShares[:-1])*costPerShare) | |
self.shares[-1] += missingCash - transactionCost | |
transactionCost = sum(np.abs(desiredChangeInShares[:-1])*costPerShare) | |
self.hist["changeInShares"].append(desiredChangeInShares) | |
self.hist["transactionCosts"].append(transactionCost) | |
self.account_value =np.sum(self.shares*self.prices[self.time]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment