Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
RL utility function exponentially_weighted_average for Calculating sum of discounted future reward or TD(lambda)
import numpy as np
def exponentially_weighted_matrix(discount, mat_len):
DisMat = np.triu(np.ones((mat_len, mat_len)) * discount, k=1)
DisMat[DisMat==0] = 1
DisMat = np.cumprod(DisMat, axis=1)
DisMat = np.triu(DisMat)
return DisMat
def exponentially_weighted_cumsum(discount, np_data):
DisMat = exponentially_weighted_matrix(discount, np_data.shape[0])
value = np.dot(DisMat, np_data.reshape(-1, 1))
return value[::-1].transpose()[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.