Skip to content
Snippets Groups Projects
Commit c16ac062 authored by tuhe's avatar tuhe
Browse files

Solutions for week 10+11

parent 72620f61
Branches
No related tags found
No related merge requests found
Showing
with 53 additions and 0 deletions
G = gamma * G + episode[t][2]
sa_t = episode[t][:2]
\ No newline at end of file
returns.append(sa_t + (G,) )
\ No newline at end of file
return self.pi_eps(s, info)
\ No newline at end of file
self.episode.append((s, a, r))
if done:
returns = get_MC_return_SA(self.episode, self.gamma, self.first_visit)
for s, a, G in returns:
# s,a = sa
if self.alpha is None:
self.returns_sum_S[s, a] += G
self.returns_count_N[s, a] += 1
self.Q[s, a] = self.returns_sum_S[s, a] / self.returns_count_N[s, a]
else:
self.Q[s, a] += self.alpha * (G - self.Q[s, a])
self.episode = []
\ No newline at end of file
train(env, agent, expn, num_episodes=episodes, return_trajectory=False)
\ No newline at end of file
G = gamma * G + episode[t][2]
s_t = episode[t][0]
\ No newline at end of file
returns.append((s_t, G))
\ No newline at end of file
self.v[s] = self.v[s] + self.alpha * (G - self.v[s])
\ No newline at end of file
self.returns_sum_S[s] += G
self.returns_count_N[s] += 1.0
self.v[s] = self.returns_sum_S[s] / self.returns_count_N[s]
\ No newline at end of file
agent_every = MCEvaluationAgent(env, gamma=gamma, first_visit=False)
\ No newline at end of file
train(env, agent_every, num_episodes=episodes, verbose=False)
\ No newline at end of file
return 0 if s[0] >= 20 else 1
\ No newline at end of file
agent = MCEvaluationAgent(env, policy=policy20, gamma=1)
train(env, agent, experiment_name=experiment, num_episodes=episodes)
\ No newline at end of file
deltas = []
for t, (s, r) in enumerate(zip(states[:-1], rewards)):
sp = states[t + 1]
delta = (r + gamma * v[sp]) - v[s]
deltas.append(delta)
\ No newline at end of file
for t in range(len(rewards)):
s = states[t]
sp = states[t + 1]
r = rewards[t]
delta = r + gamma * v[sp] - v[s]
v[s] = v[s] + alpha * delta
\ No newline at end of file
deltas = a_compute_deltas(v, states, rewards, gamma)
for t in range(len(rewards)):
s = states[t]
v[s] = v[s] + alpha * deltas[t]
\ No newline at end of file
sp = s+(2*a-1)
\ No newline at end of file
if isinstance(s, np.ndarray):
print("Bad type.")
self.v[s] += self.alpha * (r + self.gamma * (self.v[sp] if not done else 0) - self.v[s])
\ No newline at end of file
G = sum([self.gamma**(i-tau-1)*self.R[i%(n+1)] for i in range(tau+1, min(tau+n, T)+1)])
S_tau_n, A_tau_n = self.S[(tau+n)%(n+1)], self.A[(tau+n)%(n+1)]
if tau+n < T:
G += self.gamma**n * self._q(S_tau_n, A_tau_n)
\ No newline at end of file
action = self.pi_eps(s, info=info)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment