Skip to content

Instantly share code, notes, and snippets.

@fatfingererr
Created September 22, 2017 11:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fatfingererr/7ce3c66f85e127794d294660f47a1eba to your computer and use it in GitHub Desktop.
Save fatfingererr/7ce3c66f85e127794d294660f47a1eba to your computer and use it in GitHub Desktop.
RL_example
clc
clear all
disp('get GBPUSD M5 data...');
getPrice ; % get P
disp('done!');
train_ratio = 0.7 ;
nIter = 100;
epsilon = 10^(-10);
cci_period = 14 ;
c = 0.000 ;
alpha = 0.4 ;
gamma = 0.9 ;
delta = [-1,0,1];
state_cut = [-1000:100:1000];
P_cci = cci( P, cci_period );
nonnan_index = ~isnan(P_cci);
P_cci = P_cci(nonnan_index);
P = P(nonnan_index);
n_P_cci = length( P_cci );
P = P( end - n_P_cci + 1 : end );
state = zeros( 1, n_P_cci );
for i_P_cci = 1 : n_P_cci
% state 1 : < -500
if P_cci(i_P_cci) < state_cut(1)
state(i_P_cci) = 1 ;
elseif P_cci(i_P_cci) < state_cut(end)
% state 2 ~ N-1
for i_state = 2 : length(state_cut)
if P_cci(i_P_cci) < state_cut(i_state)
if P_cci(i_P_cci) >= state_cut(i_state-1)
state(i_P_cci) = i_state ;
end
end
end
else
state(i_P_cci) = length(state_cut)+1 ;
end
end
P_length = length( P );
P_train = P(1:floor( P_length * train_ratio ));
P_test = P(floor( P_length * train_ratio )+1:end);
state_train = state(1:floor( P_length * train_ratio ));
state_test = state(floor( P_length * train_ratio )+1:end);
nDelta = length( delta );
T = length( P_train );
nState = length( state_cut )+1;
Q_t = zeros(nState , nDelta);
action = zeros(1, T);
update_Q_t = zeros(1, nDelta );
for t = T : -1 : 2
disp(strcat('t:',num2str(T-t),'/',num2str(T)));
nowState = state_train(t);
% Value Calculation
V_t = zeros(nDelta, nDelta);
for nowDelta = 1 : nDelta
for lastDelta = 1 : nDelta
delta_t = delta( nowDelta );
delta_t_1 = delta( lastDelta );
V_t( lastDelta, nowDelta ) = valueFunction( P_train(t-1), P_train(t), delta_t_1, delta_t, c);
end
end
for nowDelta = 1 : nDelta
% update Q value
if t == T
Q_t( :, nowDelta ) = max( V_t( :, nowDelta ) ) * ones( 1, nState );
else
Q_t_diff = 1 ;
V_t_next = all_next_V_t( nowDelta );
while Q_t_diff > epsilon
Q_t( nowState, nowDelta ) = update_Q_t(nowDelta) ;
update_Q_t(nowDelta) = updateQValue( Q_t( nowState, nowDelta ), all_next_Q_t, V_t_next, alpha, gamma );
Q_t_diff = abs( Q_t( nowState, nowDelta ) - update_Q_t(nowDelta) );
end
end
end
all_next_Q_t = Q_t( nowState, : );
action_candidate = find( all_next_Q_t == max(all_next_Q_t) );
action(t) = action_candidate(randi([1 length(action_candidate)],1,1));
all_next_V_t = V_t( : , action(t) );
end
stateAction = zeros(1, nState);
for i_state = 1 :nState
action_candidate = find( Q_t(i_state,:) == max(Q_t(i_state,:)) );
stateAction(i_state) = action_candidate(randi([1 length(action_candidate)],1,1));
end
train_reward = zeros(1, length( P_train ) );
for t = 1 : length( P_train )
action(t) = stateAction(state_train(t));
if t > 1
train_reward(t) = train_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), action(t-1), action(t), c);
end
end
figure;
subplot(3,1,1);
plot(train_reward);
subplot(3,1,2);
plot(P_train);
subplot(3,1,3);
plot(action);
drawnow
test_reward = zeros(1, length( P_test ) );
test_action = zeros(1, length( P_test ));
for t = 1 : length( P_test )
test_action(t) = stateAction(state_test(t));
if t > 1
test_reward(t) = test_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), test_action(t-1), test_action(t), c);
end
end
figure;
subplot(3,1,1);
plot(test_reward);
subplot(3,1,2);
plot(P_train);
subplot(3,1,3);
plot(action);
drawnow
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment