keithmgould/discount.py

## discount.py

import numpy as np

gamma = 0.99

def discount_rewards(r):
    discounted_r = np.zeros_like(r)
    running_add = 0
    for t in reversed(range(0, r.size)):
        running_add = running_add * gamma + r[t]
        discounted_r[t] = running_add

    return discounted_r

foo = np.ones((6,), dtype=np.float)
discounted_r = discount_rewards(foo)
discounted_r -= np.mean(discounted_r)
discounted_r /= np.std(discounted_r)

print("final: ", discounted_r)

	import numpy as np

	gamma = 0.99

	def discount_rewards(r):
	discounted_r = np.zeros_like(r)
	running_add = 0
	for t in reversed(range(0, r.size)):
	running_add = running_add * gamma + r[t]
	discounted_r[t] = running_add

	return discounted_r

	foo = np.ones((6,), dtype=np.float)
	discounted_r = discount_rewards(foo)
	discounted_r -= np.mean(discounted_r)
	discounted_r /= np.std(discounted_r)

	print("final: ", discounted_r)