fatfingererr/RL_example.m

## RL_example.m
clc
clear all
disp('get GBPUSD M5 data...');
getPrice ; % get P
disp('done!');

train_ratio = 0.7 ;
nIter = 100;
epsilon = 10^(-10);
cci_period = 14 ;
c     = 0.000 ;
alpha = 0.4 ;
gamma = 0.9 ;

delta = [-1,0,1];
state_cut = [-1000:100:1000];


P_cci = cci( P, cci_period );
nonnan_index = ~isnan(P_cci);
P_cci = P_cci(nonnan_index);
P     = P(nonnan_index);
n_P_cci = length( P_cci );
P = P( end - n_P_cci + 1 : end );
state = zeros( 1, n_P_cci );

for i_P_cci = 1 : n_P_cci
    % state 1 : < -500
    if P_cci(i_P_cci) < state_cut(1)
        state(i_P_cci) = 1 ;
    elseif P_cci(i_P_cci) < state_cut(end)
        % state 2 ~ N-1
        for i_state = 2 : length(state_cut)
            if P_cci(i_P_cci) < state_cut(i_state)
                if P_cci(i_P_cci) >= state_cut(i_state-1)
                     state(i_P_cci) = i_state ;
                end
            end
        end
    else
       state(i_P_cci) = length(state_cut)+1 ;
    end
end

P_length = length( P );

P_train = P(1:floor( P_length * train_ratio ));
P_test  = P(floor( P_length * train_ratio )+1:end);
state_train = state(1:floor( P_length * train_ratio ));
state_test  = state(floor( P_length * train_ratio )+1:end);

nDelta = length( delta );
T      = length( P_train );
nState = length( state_cut )+1;
Q_t    = zeros(nState , nDelta);
action = zeros(1, T);

update_Q_t = zeros(1, nDelta );


for t = T : -1 : 2
    disp(strcat('t:',num2str(T-t),'/',num2str(T)));
    nowState = state_train(t);

    % Value Calculation
    V_t = zeros(nDelta, nDelta);
    for nowDelta = 1 : nDelta
        for lastDelta = 1 : nDelta
            delta_t = delta( nowDelta );
            delta_t_1 = delta( lastDelta );
            V_t( lastDelta, nowDelta ) = valueFunction( P_train(t-1), P_train(t), delta_t_1, delta_t, c);
        end
    end

    for nowDelta = 1 : nDelta
        % update Q value
        if t == T
            Q_t( :, nowDelta ) = max( V_t( :, nowDelta ) ) * ones( 1, nState );
        else
            Q_t_diff = 1 ;
            V_t_next = all_next_V_t( nowDelta );
            while Q_t_diff > epsilon
                Q_t( nowState, nowDelta ) = update_Q_t(nowDelta) ;
                update_Q_t(nowDelta) = updateQValue( Q_t( nowState, nowDelta ), all_next_Q_t, V_t_next, alpha, gamma );
                Q_t_diff = abs( Q_t( nowState, nowDelta ) - update_Q_t(nowDelta) );
            end
        end
    end
    all_next_Q_t = Q_t( nowState, : );
    action_candidate = find( all_next_Q_t == max(all_next_Q_t) );
    action(t) = action_candidate(randi([1 length(action_candidate)],1,1));
    all_next_V_t = V_t( : , action(t) );
end

stateAction = zeros(1, nState);
for i_state = 1 :nState
    action_candidate = find( Q_t(i_state,:) == max(Q_t(i_state,:)) );
    stateAction(i_state) = action_candidate(randi([1 length(action_candidate)],1,1));
end

train_reward = zeros(1, length( P_train ) );
for t = 1 : length( P_train )
    action(t) = stateAction(state_train(t));
    if t > 1
        train_reward(t) = train_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), action(t-1), action(t), c);
    end
end

figure;
subplot(3,1,1);
plot(train_reward);
subplot(3,1,2);
plot(P_train);
subplot(3,1,3);
plot(action);
drawnow

test_reward = zeros(1, length( P_test ) );
test_action = zeros(1, length( P_test ));
for t = 1 : length( P_test )
    test_action(t) = stateAction(state_test(t));
    if t > 1
        test_reward(t) = test_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), test_action(t-1), test_action(t), c);
    end
end

figure;
subplot(3,1,1);
plot(test_reward);
subplot(3,1,2);
plot(P_train);
subplot(3,1,3);
plot(action);
drawnow
	clc
	clear all
	disp('get GBPUSD M5 data...');
	getPrice ; % get P
	disp('done!');

	train_ratio = 0.7 ;
	nIter = 100;
	epsilon = 10^(-10);
	cci_period = 14 ;
	c = 0.000 ;
	alpha = 0.4 ;
	gamma = 0.9 ;

	delta = [-1,0,1];
	state_cut = [-1000:100:1000];


	P_cci = cci( P, cci_period );
	nonnan_index = ~isnan(P_cci);
	P_cci = P_cci(nonnan_index);
	P = P(nonnan_index);
	n_P_cci = length( P_cci );
	P = P( end - n_P_cci + 1 : end );
	state = zeros( 1, n_P_cci );

	for i_P_cci = 1 : n_P_cci
	% state 1 : < -500
	if P_cci(i_P_cci) < state_cut(1)
	state(i_P_cci) = 1 ;
	elseif P_cci(i_P_cci) < state_cut(end)
	% state 2 ~ N-1
	for i_state = 2 : length(state_cut)
	if P_cci(i_P_cci) < state_cut(i_state)
	if P_cci(i_P_cci) >= state_cut(i_state-1)
	state(i_P_cci) = i_state ;
	end
	end
	end
	else
	state(i_P_cci) = length(state_cut)+1 ;
	end
	end

	P_length = length( P );

	P_train = P(1:floor( P_length * train_ratio ));
	P_test = P(floor( P_length * train_ratio )+1:end);
	state_train = state(1:floor( P_length * train_ratio ));
	state_test = state(floor( P_length * train_ratio )+1:end);

	nDelta = length( delta );
	T = length( P_train );
	nState = length( state_cut )+1;
	Q_t = zeros(nState , nDelta);
	action = zeros(1, T);

	update_Q_t = zeros(1, nDelta );


	for t = T : -1 : 2
	disp(strcat('t:',num2str(T-t),'/',num2str(T)));
	nowState = state_train(t);

	% Value Calculation
	V_t = zeros(nDelta, nDelta);
	for nowDelta = 1 : nDelta
	for lastDelta = 1 : nDelta
	delta_t = delta( nowDelta );
	delta_t_1 = delta( lastDelta );
	V_t( lastDelta, nowDelta ) = valueFunction( P_train(t-1), P_train(t), delta_t_1, delta_t, c);
	end
	end

	for nowDelta = 1 : nDelta
	% update Q value
	if t == T
	Q_t( :, nowDelta ) = max( V_t( :, nowDelta ) ) * ones( 1, nState );
	else
	Q_t_diff = 1 ;
	V_t_next = all_next_V_t( nowDelta );
	while Q_t_diff > epsilon
	Q_t( nowState, nowDelta ) = update_Q_t(nowDelta) ;
	update_Q_t(nowDelta) = updateQValue( Q_t( nowState, nowDelta ), all_next_Q_t, V_t_next, alpha, gamma );
	Q_t_diff = abs( Q_t( nowState, nowDelta ) - update_Q_t(nowDelta) );
	end
	end
	end
	all_next_Q_t = Q_t( nowState, : );
	action_candidate = find( all_next_Q_t == max(all_next_Q_t) );
	action(t) = action_candidate(randi([1 length(action_candidate)],1,1));
	all_next_V_t = V_t( : , action(t) );
	end

	stateAction = zeros(1, nState);
	for i_state = 1 :nState
	action_candidate = find( Q_t(i_state,:) == max(Q_t(i_state,:)) );
	stateAction(i_state) = action_candidate(randi([1 length(action_candidate)],1,1));
	end

	train_reward = zeros(1, length( P_train ) );
	for t = 1 : length( P_train )
	action(t) = stateAction(state_train(t));
	if t > 1
	train_reward(t) = train_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), action(t-1), action(t), c);
	end
	end

	figure;
	subplot(3,1,1);
	plot(train_reward);
	subplot(3,1,2);
	plot(P_train);
	subplot(3,1,3);
	plot(action);
	drawnow

	test_reward = zeros(1, length( P_test ) );
	test_action = zeros(1, length( P_test ));
	for t = 1 : length( P_test )
	test_action(t) = stateAction(state_test(t));
	if t > 1
	test_reward(t) = test_reward(t-1) + rewardFunction( P_train(t-1), P_train(t), test_action(t-1), test_action(t), c);
	end
	end

	figure;
	subplot(3,1,1);
	plot(test_reward);
	subplot(3,1,2);
	plot(P_train);
	subplot(3,1,3);
	plot(action);
	drawnow