praveen-palanisamy/gist:3802cec2b8ad67fd667f

## gistfile1.txt
lossScalar = 1 - reward; % This is loss of the chosen action
lossVector = zeros(1,self.nbActions);
lossVector(astAction) = lossScalar;
self.timeStep=self.timeStep+1;
%The weight update step below depends on the learning policy. This will probably be handled by the NN/RL-net
self.weights=self.weights.*(exp(-sqrt(log(self.numActions)/self.timeStep)*lossVector))';
	lossScalar = 1 - reward; % This is loss of the chosen action
	lossVector = zeros(1,self.nbActions);
	lossVector(astAction) = lossScalar;
	self.timeStep=self.timeStep+1;
	%The weight update step below depends on the learning policy. This will probably be handled by the NN/RL-net
	self.weights=self.weights.(exp(-sqrt(log(self.numActions)/self.timeStep)lossVector))';