Skip to content
Snippets Groups Projects
Commit 2b44091b authored by tuhe's avatar tuhe
Browse files

Week 11

parent c16ac062
No related branches found
No related tags found
No related merge requests found
Showing
with 723 additions and 0 deletions
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.gridworld_pyglet.gridworld_environments import OpenGridEnvironment
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
from irlc.ex11.q_agent import QAgent
def open_play(Agent, method_label, **args):
env = OpenGridEnvironment()
agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
keyboard_play(env, agent, method_label=method_label)
if __name__ == "__main__":
open_play(QAgent, method_label="Q-learning")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human')
agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.96, n=1)
keyboard_play(env, agent, method_label="Sarsa")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# from irlc.utils.player_wrapper_pyglet import PlayWrapper
from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
# from irlc.utils.video_monitor import VideoMonitor
from irlc.ex01.agent import train
from irlc import interactive
from irlc.ex11.sarsa_agent import SarsaAgent
def cliffwalk(env, agent, method_label="method"):
# agent = PlayWrapper(agent, env)
env.label = method_label
agent.method_label = method_label
agent.label = method_label
agent.method = method_label
env, agent = interactive(env, agent)
# env = VideoMonitor(env, agent=agent, fps=200, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
train(env, agent, num_episodes=1000)
env.close()
epsi = 0.5
gamma = 1.0
alpha = .3
if __name__ == "__main__":
import numpy as np
np.random.seed(1)
env = CliffGridEnvironment2(zoom=.8, render_mode='human')
agent = SarsaAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
# agent = QAgent(env, gamma=0.95, epsilon=0.5, alpha=.2)
cliffwalk(env, agent, method_label="Sarsa")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from gymnasium.envs.classic_control import MountainCarEnv
import math
from typing import Optional
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from gymnasium.envs.classic_control import utils
from gymnasium.error import DependencyNotInstalled
class FancyMountainCar(MountainCarEnv): # piggybag on the original env.
visualization = None
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
super().__init__(render_mode=render_mode, goal_velocity=goal_velocity)
def render(self):
if self.visualization is None:
self.visualization = MountainCarVisualization(self, self.agent if hasattr(self, 'agent') else None)
return self.visualization.render()
def close(self):
if self.visualization is not None:
self.visualization.close()
from irlc.pacman.pacman_resources import WHITE, BLACK
from irlc.utils.graphics_util_pygame import GraphicsUtilGym
class MountainCarVisualization:
def __init__(self, env, agent):
self.env = env
self.agent = agent
# self.k = 0
# self.states = []
# self.actions = []
# self.factories = []
# self.inventory = inventory
# xmin = -0.2
# xmax = inventory.N * 2 + 1.4
# xmax = 4
# ymin = -0.4
# ymax = 1.4 + 0.2
# dx = xmax - xmin
# dy = ymax - ymin
self.ga = GraphicsUtilGym()
# screen_width = 1300
screen_width = env.screen_width * 2
#
# -env.min_position
# env.max_position
xmin = env.min_position
xmax = env.max_position + 1.8
# env._height
screen_height = env.screen_height
ymin = 0
ymax = 1.2
# screen_height = dy * (screen_width / dx)
frames_per_second = 30
self.ga.begin_graphics(screen_width, screen_height,
local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second,
color=WHITE, title=f"MountainCar Environment")
# self.last_action = None
# self.agent = None
# self.last_reward = None
# self.scale = screen_width / dx
x_cache = []
def render(self):
# if self.env.render_mode is None:
# assert self.env.spec is not None
# gym.logger.warn(
# "You are calling render method without specifying any render mode. "
# "You can specify the render_mode at initialization, "
# f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
# )
# return
# try:
# import pygame
# from pygame import gfxdraw
# except ImportError as e:
# raise DependencyNotInstalled(
# 'pygame is not installed, run `pip install "gymnasium[classic_control]"`'
# ) from e
#
#
# if self.screen is None:
# pygame.init()
# if self.render_mode == "human":
# pygame.display.init()
# self.screen = pygame.display.set_mode(
# (self.screen_width, self.screen_height)
# )
# else: # mode in "rgb_array"
# self.screen = pygame.Surface((self.screen_width, self.screen_height))
# if self.clock is None:
# self.clock = pygame.time.Clock()
self.ga.draw_background()
# self.ga.circle("sadf", pos=(0,0), r=100, fillColor=(100, 10, 50))
pos = self.env.state[0]
scale = 1
xs = np.linspace(self.env.min_position, self.env.max_position, 100)
ys = self.env._height(xs)
# xys = list(zip((xs - self.env.min_position) * scale, ys * scale))
self.ga.polyline("asdfasfd", xs=xs, ys=ys, width=1)
# pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
world_width = self.env.max_position - self.env.min_position
# scale = self.screen_width / world_width
rscale = self.env.screen_width / world_width
carwidth = 40 / rscale
carheight = 20 / rscale
# self.surf = pygame.Surface((self.screen_width, self.screen_height))
# self.surf.fill((255, 255, 255))
# pos = self.state[0]
# xs = np.linspace(self.min_position, self.max_position, 100)
# ys = self._height(xs)
# xys = list(zip((xs - self.min_position) * scale, ys * scale))
# pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
import pygame
clearance = 10 / rscale
# clearance=0.01
l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
coords = []
for c in [(l, b), (l, t), (r, t), (r, b)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
coords.append(
(
c[0] + (pos - 0*self.env.min_position) * scale,
c[1] + clearance + self.env._height(pos) * scale,
)
)
self.ga.polygon("adsfasdf", coords=coords, outlineColor=BLACK, fillColor=BLACK, width=2)
# gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
# gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
wheel = (
c[0] + (pos - 0*self.env.min_position) * scale,
c[1] + clearance + self.env._height(pos) * scale,
)
# gfxdraw.aacircle(
# self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
# )
self.ga.circle("asdf", (wheel[0], wheel[1]), int(carheight / 2.5*rscale), fillColor=(128, 128, 128), outlineColor= (70, 70, 70))
#
# gfxdraw.filled_circle(
# self.surf, wheel[0], wheel[1], int(carheight / 2.5 * rscale), (128, 128, 128)
# )
flagx = (self.env.goal_position - 0*self.env.min_position) * scale
flagy1 = self.env._height(self.env.goal_position) * scale
flagy2 = flagy1 + 50/rscale
self.ga.line("asdfasdf", (flagx, flagy1), (flagx, flagy2), color=(0, 0, 0))
self.ga.polygon(
"sdfasdf",
[(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
(204, 204, 0),
)
# gfxdraw.aapolygon(
# self.surf,
# [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
# (204, 204, 0),
# )
# gfxdraw.filled_polygon(
# self.surf,
# [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5)],
# (204, 204, 0),
# )
# Optionally draw the value functino.
# oxmin = 0.6
# oxmax = 1.7
# oymin = 0
# oymax = 1
# self.env.observation_space
# dx = 1.5
# dy = 0
# sX = 1
# sY = 1
# Pscale = 1
Vscale = 6
# def pos2s(pos):#, vel):
# return pos + 1.8 #, (vel + 0.2) * 3
# def vel2s(vel):
# return (vel + 0.) * Vscale
def x2s(pos, vel):
return pos + 1.75, (vel + 0.1) * Vscale
xmin,ymin = x2s(self.env.observation_space.low[0], self.env.observation_space.low[1] )
xmax,ymax = x2s(self.env.observation_space.high[0], self.env.observation_space.high[1] )
px, py = x2s( *np.asarray(self.env.state).tolist())
# self.env.observation_space.low
if self.agent is not None:
def colfunc(val, minval, maxval, startcolor, stopcolor):
""" Convert value in the range minval...maxval to a color in the range
startcolor to stopcolor. The colors passed and the one returned are
composed of a sequence of N component values (e.g. RGB).
"""
f = float(val - minval) / (maxval - minval)
return tuple( float( f * (b - a) + a) for (a, b) in zip(startcolor, stopcolor))
RED, YELLOW, GREEN = (1, 0, 0), (1, 1, 0), (0, 1, 0)
CYAN, BLUE, MAGENTA = (0, 1, 1), (0, 0, 1), (1, 0, 1)
steps = 10
minval, maxval = 0.0, 1.0
# incr = (maxval - minval) / steps
# for i in range(steps + 1):
# val = minval + round(i * incr, 1)
# # print('{:.1f} -> ({:.3f}, {:.3f}, {:.3f})'.format(
# # val, *colfunc(val, minval, maxval, BLUE, RED)))
value_function = lambda s: -max(self.agent.Q.get_Qs(s)[1])
grid_size = 40
# grid_size = 30
low = self.env.unwrapped.observation_space.low
high = self.env.unwrapped.observation_space.high
X, Y = np.meshgrid(np.linspace(low[0], high[0], grid_size), np.linspace(low[1], high[1], grid_size))
Z = X * 0
if self.x_cache is None or len(self.x_cache) == 0:
for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
s = (x, y)
xx = [self.agent.Q.x(s, a) for a in range(self.env.action_space.n) ]
self.x_cache.append(xx)
# Z.flat[i] = value_function((x, y))
pass
# for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
# # [max([float(self.agent.Q.w @ dx) for dx in xx]) for xx in self.x_cache]
#
#
#
# Z.flat[i] = value_function((x, y))
# pass
for i in range(len(self.x_cache)):
Z.flat[i] = max([float(self.agent.Q.w @ dx) for dx in self.x_cache[i]])
pass
for i in range(len(Z.flat)):
ddx = (X.max() - X.min()) / (grid_size-1)
ddy = (Y.max() - Y.min()) / (grid_size-1)
z = colfunc(Z.flat[i], Z.min(), Z.max()+0.01, BLUE, RED)
z = tuple( int(x*255) for x in z)
xmin, ymin = x2s(X.flat[i], Y.flat[i])
xmax, ymax = x2s(X.flat[i]+ddx, Y.flat[i]+ddy)
self.ga.rectangle(color=z, x=xmin, y=ymin, width=xmax-xmin, height=ymax-ymin)
pass
# colfunc(val, minval, maxval, startcolor, stopcolor):
self.ga.rectangle(color=BLACK, x=xmin, y=ymin, width=xmax - xmin, height=ymax - ymin, border=1)
self.ga.circle("asdf", (px, py), r=5, fillColor=(200, 200, 200))
return self.ga.blit(render_mode=self.env.render_mode)
# self.surf = pygame.transform.flip(self.surf, False, True)
# self.screen.blit(self.surf, (0, 0))
# if self.render_mode == "human":
# pygame.event.pump()
# self.clock.tick(self.metadata["render_fps"])
# pygame.display.flip()
#
# elif self.render_mode == "rgb_array":
# return np.transpose(
# np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
# )
def close(self):
self.ga.close()
if __name__ == '__main__':
from irlc import Agent, interactive, train
env = FancyMountainCar(render_mode='human')
num_of_tilings = 8
alpha = 0.3
from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
# env = gym.make("MountainCar-v0")
agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
# agent = Agent(env)
env, agent = interactive(env, agent)
train(env, agent, num_episodes=10)
env.close()
pass
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
from irlc import interactive, train
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human')
agent = SarsaDelayNAgent(env, gamma=1, epsilon=0.1, alpha=0.9, n=1) # Exam problem.
# agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.2, n=1)
env, agent = interactive(env, agent)
train(env, agent, num_episodes=10)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex10.question_td0 import a_compute_deltas, b_perform_td0, c_perform_td0_batched
from unitgrade import Report, UTestCase, cache
from irlc import train
import irlc.ex10.envs
import gymnasium as gym
from gymnasium.wrappers import TimeLimit
from irlc.tests.tests_week08 import train_recording
class MCAgentQuestion(UTestCase):
""" Test of MC agent """
def get_env_agent(self):
from irlc.ex10.mc_agent import MCAgent
env = gym.make("SmallGridworld-v0")
env = TimeLimit(env, max_episode_steps=1000)
gamma = .8
agent = MCAgent(env, gamma=gamma, first_visit=True)
return env, agent
@cache
def compute_trajectories(self):
env, agent = self.get_env_agent()
_, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
return trajectories, agent.Q.to_dict()
def test_Q_function(self):
trajectories, Q = self.compute_trajectories()
env, agent = self.get_env_agent()
train_recording(env, agent, trajectories)
Qc = []
Qe = []
for s, qa in Q.items():
for a,q in qa.items():
Qe.append(q)
Qc.append(agent.Q[s,a])
self.assertL2(Qe, Qc, tol=1e-5)
# class BlackjackQuestion(UTestCase):
# """ MC policy evaluation agent and Blacjack """
# def test_blackjack_mc(self):
# env = gym.make("Blackjack-v1")
# episodes = 50000
# from irlc.ex10.mc_evaluate import MCEvaluationAgent
# from irlc.ex10.mc_evaluate_blackjack import get_by_ace, to_matrix, policy20
# agent = MCEvaluationAgent(env, policy=policy20, gamma=1)
# train(env, agent, num_episodes=episodes)
# w = get_by_ace(agent.v, ace=True)
# X, Y, Z = to_matrix(w)
# print(Z)
# print(Z.dtype)
# self.assertL2(Z, tol=2.5)
class TD0Question(UTestCase):
""" Test of TD(0) evaluation agent """
gamma = 0.8
def get_env_agent(self):
from irlc.ex10.td0_evaluate import TD0ValueAgent
env = gym.make("SmallGridworld-v0")
# env = TimeLimit(env, max_episode_steps=1000)
agent = TD0ValueAgent(env, gamma=self.gamma)
return env, agent
@cache
def compute_trajectories(self):
env, agent = self.get_env_agent()
_, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
return trajectories, agent.v
def test_value_function(self):
# for k in range(1000):
trajectories, v = self.compute_trajectories()
env, agent = self.get_env_agent()
train_recording(env, agent, trajectories)
Qc = []
Qe = []
for s, value in v.items():
Qe.append(value)
Qc.append(agent.v[s])
self.assertL2(Qe, Qc, tol=1e-5)
class MCEvaluationQuestion(TD0Question):
""" Test of MC evaluation agent """
def get_env_agent(self):
from irlc.ex10.mc_evaluate import MCEvaluationAgent
env = gym.make("SmallGridworld-v0")
env = TimeLimit(env, max_episode_steps=1000)
gamma = .8
agent = MCEvaluationAgent(env, gamma=gamma, first_visit=True)
return env, agent
class ExamQuestionTD0(UTestCase):
def get_problem(self):
states = [1, 0, 2, -1, 2, 4, 5, 4, 3, 2, 1, -1]
rewards = [1, 1, -1, 0, 1, 2, 2, 0, 0, -1, 1]
v = {s: 0 for s in states}
gamma = 0.9
alpha = 0.2
return v, states, rewards, gamma, alpha
def test_a(self):
v, states, rewards, gamma, alpha = self.get_problem()
self.assertEqualC(a_compute_deltas(v, states, rewards, gamma))
def test_b(self):
v, states, rewards, gamma, alpha = self.get_problem()
self.assertEqualC(b_perform_td0(v, states, rewards, gamma, alpha))
def test_c(self):
v, states, rewards, gamma, alpha = self.get_problem()
self.assertEqualC(c_perform_td0_batched(v, states, rewards, gamma, alpha))
class Week10Tests(Report):
title = "Tests for week 10"
pack_imports = [irlc]
individual_imports = []
questions = [(MCAgentQuestion, 10),
(MCEvaluationQuestion, 10),
# (BlackjackQuestion,5),
(TD0Question, 10),
(ExamQuestionTD0, 10),
]
if __name__ == '__main__':
from unitgrade import evaluate_report_student
evaluate_report_student(Week10Tests())
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from unitgrade import UTestCase, Report, cache
import numpy as np
from irlc import train
import irlc.ex10.envs
import gymnasium as gym
from irlc.tests.tests_week08 import train_recording
from irlc.tests.tests_week10 import TD0Question, MCAgentQuestion
# This problem no longer exists.
# class NStepSarseEvaluationQuestion(TD0Question):
# """ Test of TD-n evaluation agent """
# # class EvaluateTabular(VExperienceItem):
# # title = "Value-function test"
# gamma = 0.8
# def get_env_agent(self):
# envn = "SmallGridworld-v0"
# from irlc.ex11.nstep_td_evaluate import TDnValueAgent
# env = gym.make(envn)
# agent = TDnValueAgent(env, gamma=self.gamma, n=5)
# return env, agent
class QAgentQuestion(MCAgentQuestion):
""" Test of Q Agent """
# class EvaluateTabular(QExperienceItem):
# title = "Q-value test"
def get_env_agent(self):
from irlc.ex11.q_agent import QAgent
env = gym.make("SmallGridworld-v0")
agent = QAgent(env, gamma=.8)
return env, agent
# class LinearWeightVectorTest(UTestCase):
# class LinearValueFunctionTest(LinearWeightVectorTest):
# title = "Linear value-function test"
# def compute_answer_print(self):
# trajectories, Q = self.precomputed_payload()
# env, agent = self.get_env_agent()
# train_recording(env, agent, trajectories)
# self.Q = Q
# self.question.agent = agent
# vfun = [agent.Q[s,a] for s, a in zip(trajectories[0].state, trajectories[0].action)]
# return vfun
# class TabularAgentStub(UTestCase):
#
# pass
class TabularAgentStub(UTestCase):
""" Average return over many simulated episodes """
gamma = 0.95
epsilon = 0.2
tol = 0.1
tol_qs = 0.3
episodes = 9000
def get_env(self):
return gym.make("SmallGridworld-v0")
def get_env_agent(self):
raise NotImplementedError()
# from irlc.ex11.sarsa_agent import SarsaAgent
# agent = SarsaAgent(self.get_env(), gamma=self.gamma)
# return agent.env, agent
def get_trained_agent(self):
env, agent = self.get_env_agent()
stats, _ = train(env, agent, num_episodes=self.episodes)
return agent, stats
def chk_accumulated_reward(self):
agent, stats = self.get_trained_agent()
s0, _ = agent.env.reset()
actions, qs = agent.Q.get_Qs(s0)
print("Tolerance is", self.tol_qs)
self.assertL2(qs, tol=self.tol_qs)
self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
# def test_accumulated_reward(self):
# env, agent = self.get_env_agent()
# stats, _ = train(env, agent, num_episodes=5000)
# s = env.reset()
# actions, qs = agent.Q.get_Qs(s)
# self.assertL2(qs, tol=0.3)
# self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
class SarsaQuestion(TabularAgentStub):
def get_env_agent(self):
from irlc.ex11.sarsa_agent import SarsaAgent
agent = SarsaAgent(self.get_env(), gamma=self.gamma)
return agent.env, agent
def test_accumulated_reward(self):
self.tol_qs = 2.7 # Got 2.65 in one run.
self.chk_accumulated_reward()
class NStepSarsaQuestion(TabularAgentStub):
title = "N-step Sarsa"
# class SarsaReturnItem(SarsaQuestion):
def get_env_agent(self):
from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
agent = SarsaNAgent(self.get_env(), gamma=self.gamma, n=5)
return agent.env, agent
def test_accumulated_reward(self):
self.tol_qs = 2.7
self.chk_accumulated_reward()
class LinearAgentStub(UTestCase):
# class LinearExperienceItem(LinearWeightVectorTest):
tol = 1e-6
# title = "Linear sarsa agent"
alpha = 0.08
num_episodes = 300
# title = "Weight-vector test"
# testfun = QPrintItem.assertL2
gamma = 0.8
tol_w = 1e-5
def get_env_agent(self):
raise NotImplementedError()
def get_env(self):
return gym.make("MountainCar500-v0")
# def get_env_agent(self):
# return None, None
@cache
def compute_trajectories(self):
env, agent = self.get_env_agent()
_, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
return trajectories, agent.Q.w
def chk_Q_weight_vector_w(self):
trajectories, w = self.compute_trajectories()
env, agent = self.get_env_agent()
train_recording(env, agent, trajectories)
print(w)
print(agent.Q.w)
self.assertL2(agent.Q.w, w, tol=self.tol_w)
pass
class LinearSarsaAgentQuestion(LinearAgentStub):
""" Sarsa Agent with linear function approximators """
def get_env_agent(self):
env = self.get_env()
from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
agent = LinearSemiGradSarsa(env, gamma=1, alpha=self.alpha, epsilon=0)
return env, agent
def test_Q_weight_vector_w(self):
self.tol_w = 1.4
self.chk_Q_weight_vector_w()
class LinearQAgentQuestion(LinearAgentStub):
""" Test of Linear Q Agent """
def get_env_agent(self):
env = self.get_env()
alpha = 0.1
from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
agent = LinearSemiGradQAgent(env, gamma=1, alpha=alpha, epsilon=0)
return env, agent
def test_Q_weight_vector_w(self):
# self.tol_qs = 1.9
self.tol_w = 7
self.chk_Q_weight_vector_w()
class Week11Tests(Report):
title = "Tests for week 11"
pack_imports = [irlc]
individual_imports = []
questions =[
# (NStepSarseEvaluationQuestion, 10),
(QAgentQuestion, 10),
(LinearQAgentQuestion, 10),
(LinearSarsaAgentQuestion, 10),
(SarsaQuestion, 10),
(NStepSarsaQuestion, 5),
]
if __name__ == '__main__':
from unitgrade import evaluate_report_student
evaluate_report_student(Week11Tests())
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment