-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsingle_update.m
28 lines (25 loc) · 885 Bytes
/
single_update.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
function q = single_update(q, state, past_state, reward, action, lambda, alpha)
% SINGLE_UPDATE = update the Q-value table for a single state
%
% q = the Q-value table at instant t
% state = the state s_t
% past_state = the state s_{t-1}
% reward = the reward r_{t-1}
% action = the chosen action a_{t-1}
% lambda = exponential discount factor
% alpha = learning rate
%
% q = the updated Q-value table for instant t+1
% find the long-term reward
for i = 1 : 2 : length(state) - 1,
if (state(i + 1) > 0),
reward = reward + lambda * state(i + 1) * max(q(state(i), :));
end
end
% update the Q-value for all states in the old state vector s_{t-1}
for i = 1 : 2 : length(past_state) - 1,
if (past_state(i + 1) > 0),
q(past_state(i), action) = q(past_state(i), action) + alpha * past_state(i + 1) * (reward - q(past_state(i), action));
end
end
end