Week 11

2b44091b · tuhe · c16ac062 · 2b44091b · 2b44091b · 2b44091b
Commit 2b44091b authored 3 months ago by tuhe
--- a/irlc/lectures/lec11/lecture_11_q_open.py
+++ b/irlc/lectures/lec11/lecture_11_q_open.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld_pyglet.gridworld_environments import OpenGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.q_agent import QAgent
+
+def open_play(Agent, method_label, **args):
+    env = OpenGridEnvironment()
+    agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
+    keyboard_play(env, agent, method_label=method_label)
+
+if __name__ == "__main__":
+    open_play(QAgent, method_label="Q-learning")
--- a/irlc/lectures/lec11/lecture_11_sarsa.py
+++ b/irlc/lectures/lec11/lecture_11_sarsa.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.96, n=1)
+    keyboard_play(env, agent, method_label="Sarsa")
--- a/irlc/lectures/lec11/lecture_11_sarsa_cliff.py
+++ b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.utils.player_wrapper_pyglet import PlayWrapper
+from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+from irlc import interactive
+from irlc.ex11.sarsa_agent import SarsaAgent
+
+
+def cliffwalk(env, agent, method_label="method"):
+    # agent = PlayWrapper(agent, env)
+    env.label = method_label
+    agent.method_label = method_label
+    agent.label = method_label
+    agent.method = method_label
+
+
+    env, agent = interactive(env, agent)
+    # env = VideoMonitor(env, agent=agent, fps=200, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
+    train(env, agent, num_episodes=1000)
+    env.close()
+
+epsi = 0.5
+gamma = 1.0
+alpha = .3
+
+if __name__ == "__main__":
+    import numpy as np
+    np.random.seed(1)
+    env = CliffGridEnvironment2(zoom=.8, render_mode='human')
+    agent = SarsaAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
+    # agent = QAgent(env, gamma=0.95, epsilon=0.5, alpha=.2)
+    cliffwalk(env, agent, method_label="Sarsa")
--- a/irlc/lectures/lec11/mountain_car_env.py
+++ b/irlc/lectures/lec11/mountain_car_env.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from gymnasium.envs.classic_control import MountainCarEnv
+import math
+from typing import Optional
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+class FancyMountainCar(MountainCarEnv):  # piggybag on the original env.
+    visualization = None
+
+    def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
+        super().__init__(render_mode=render_mode, goal_velocity=goal_velocity)
+
+    def render(self):
+        if self.visualization is None:
+            self.visualization = MountainCarVisualization(self, self.agent if hasattr(self, 'agent') else None)
+        return self.visualization.render()
+
+    def close(self):
+        if self.visualization is not None:
+            self.visualization.close()
+
+
+from irlc.pacman.pacman_resources import WHITE, BLACK
+from irlc.utils.graphics_util_pygame import GraphicsUtilGym
+class MountainCarVisualization:
+    def __init__(self, env, agent):
+        self.env = env
+        self.agent = agent
+
+        # self.k = 0
+        # self.states = []
+        # self.actions = []
+        # self.factories = []
+        # self.inventory = inventory
+        # xmin = -0.2
+        # xmax = inventory.N * 2 + 1.4
+        # xmax = 4
+
+        # ymin = -0.4
+        # ymax = 1.4 + 0.2
+        # dx = xmax - xmin
+        # dy = ymax - ymin
+        self.ga = GraphicsUtilGym()
+        # screen_width = 1300
+        screen_width = env.screen_width * 2
+        #
+        # -env.min_position
+        # env.max_position
+
+        xmin = env.min_position
+        xmax = env.max_position + 1.8
+        # env._height
+
+        screen_height = env.screen_height
+        ymin = 0
+        ymax = 1.2
+        # screen_height = dy * (screen_width / dx)
+        frames_per_second = 30
+        self.ga.begin_graphics(screen_width, screen_height,
+                               local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second,
+                               color=WHITE, title=f"MountainCar Environment")
+
+        # self.last_action = None
+        # self.agent = None
+        # self.last_reward = None
+        # self.scale = screen_width / dx
+
+    x_cache = []
+
+
+    def render(self):
+        # if self.env.render_mode is None:
+        #     assert self.env.spec is not None
+        #     gym.logger.warn(
+        #         "You are calling render method without specifying any render mode. "
+        #         "You can specify the render_mode at initialization, "
+        #         f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+        #     )
+        #     return
+        # try:
+        #     import pygame
+        #     from pygame import gfxdraw
+        # except ImportError as e:
+        #     raise DependencyNotInstalled(
+        #         'pygame is not installed, run `pip install "gymnasium[classic_control]"`'
+        #     ) from e
+
+        #
+        #
+        # if self.screen is None:
+        #     pygame.init()
+        #     if self.render_mode == "human":
+        #         pygame.display.init()
+        #         self.screen = pygame.display.set_mode(
+        #             (self.screen_width, self.screen_height)
+        #         )
+        #     else:  # mode in "rgb_array"
+        #         self.screen = pygame.Surface((self.screen_width, self.screen_height))
+        # if self.clock is None:
+        #     self.clock = pygame.time.Clock()
+        self.ga.draw_background()
+        # self.ga.circle("sadf", pos=(0,0), r=100, fillColor=(100, 10, 50))
+
+        pos = self.env.state[0]
+        scale = 1
+
+        xs = np.linspace(self.env.min_position, self.env.max_position, 100)
+        ys = self.env._height(xs)
+        # xys = list(zip((xs - self.env.min_position) * scale, ys * scale))
+
+        self.ga.polyline("asdfasfd", xs=xs, ys=ys, width=1)
+
+
+        # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+
+
+
+        world_width = self.env.max_position - self.env.min_position
+        # scale = self.screen_width / world_width
+        rscale = self.env.screen_width / world_width
+
+        carwidth = 40 / rscale
+        carheight = 20 / rscale
+
+        # self.surf = pygame.Surface((self.screen_width, self.screen_height))
+        # self.surf.fill((255, 255, 255))
+
+        # pos = self.state[0]
+
+        # xs = np.linspace(self.min_position, self.max_position, 100)
+        # ys = self._height(xs)
+        # xys = list(zip((xs - self.min_position) * scale, ys * scale))
+
+        # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+        import pygame
+        clearance = 10 / rscale
+        # clearance=0.01
+
+        l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
+        coords = []
+        for c in [(l, b), (l, t), (r, t), (r, b)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            coords.append(
+                (
+                    c[0] + (pos - 0*self.env.min_position) * scale,
+                    c[1] + clearance + self.env._height(pos) * scale,
+                )
+            )
+        self.ga.polygon("adsfasdf", coords=coords, outlineColor=BLACK, fillColor=BLACK, width=2)
+        # gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
+        # gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
+
+
+        for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            wheel = (
+                c[0] + (pos - 0*self.env.min_position) * scale,
+                c[1] + clearance + self.env._height(pos) * scale,
+            )
+
+            # gfxdraw.aacircle(
+            #     self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            # )
+
+            self.ga.circle("asdf", (wheel[0], wheel[1]),  int(carheight / 2.5*rscale), fillColor=(128, 128, 128), outlineColor= (70, 70, 70))
+            #
+            # gfxdraw.filled_circle(
+            #     self.surf, wheel[0], wheel[1], int(carheight / 2.5 * rscale), (128, 128, 128)
+            # )
+
+        flagx = (self.env.goal_position - 0*self.env.min_position) * scale
+        flagy1 = self.env._height(self.env.goal_position) * scale
+        flagy2 = flagy1 + 50/rscale
+        self.ga.line("asdfasdf", (flagx, flagy1), (flagx, flagy2), color=(0, 0, 0))
+
+        self.ga.polygon(
+                "sdfasdf",
+            [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
+            (204, 204, 0),
+        )
+        # gfxdraw.aapolygon(
+        #     self.surf,
+        #     [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
+        #     (204, 204, 0),
+        # )
+        # gfxdraw.filled_polygon(
+        #     self.surf,
+        #     [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5)],
+        #     (204, 204, 0),
+        # )
+        # Optionally draw the value functino.
+        # oxmin = 0.6
+        # oxmax = 1.7
+        # oymin = 0
+        # oymax = 1
+
+        # self.env.observation_space
+        # dx = 1.5
+        # dy = 0
+
+        # sX = 1
+        # sY = 1
+
+        # Pscale = 1
+        Vscale = 6
+
+        # def pos2s(pos):#, vel):
+        #     return pos + 1.8 #, (vel + 0.2) * 3
+        # def vel2s(vel):
+        #     return (vel + 0.) * Vscale
+
+        def x2s(pos, vel):
+            return pos + 1.75, (vel + 0.1) * Vscale
+
+        xmin,ymin = x2s(self.env.observation_space.low[0], self.env.observation_space.low[1] )
+        xmax,ymax = x2s(self.env.observation_space.high[0], self.env.observation_space.high[1] )
+
+        px, py = x2s( *np.asarray(self.env.state).tolist())
+
+
+
+        # self.env.observation_space.low
+        if self.agent is not None:
+
+            def colfunc(val, minval, maxval, startcolor, stopcolor):
+                """ Convert value in the range minval...maxval to a color in the range
+                    startcolor to stopcolor. The colors passed and the one returned are
+                    composed of a sequence of N component values (e.g. RGB).
+                """
+                f = float(val - minval) / (maxval - minval)
+                return tuple( float( f * (b - a) + a) for (a, b) in zip(startcolor, stopcolor))
+
+            RED, YELLOW, GREEN = (1, 0, 0), (1, 1, 0), (0, 1, 0)
+            CYAN, BLUE, MAGENTA = (0, 1, 1), (0, 0, 1), (1, 0, 1)
+            steps = 10
+            minval, maxval = 0.0, 1.0
+            # incr = (maxval - minval) / steps
+            # for i in range(steps + 1):
+            #     val = minval + round(i * incr, 1)
+            #     # print('{:.1f} -> ({:.3f}, {:.3f}, {:.3f})'.format(
+            #     #     val, *colfunc(val, minval, maxval, BLUE, RED)))
+
+            value_function = lambda s: -max(self.agent.Q.get_Qs(s)[1])
+
+            grid_size = 40
+            # grid_size = 30
+            low = self.env.unwrapped.observation_space.low
+            high = self.env.unwrapped.observation_space.high
+            X, Y = np.meshgrid(np.linspace(low[0], high[0], grid_size), np.linspace(low[1], high[1], grid_size))
+            Z = X * 0
+
+            if self.x_cache is None or len(self.x_cache) == 0:
+                for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
+                    s = (x, y)
+                    xx = [self.agent.Q.x(s, a) for a in range(self.env.action_space.n) ]
+                    self.x_cache.append(xx)
+                    # Z.flat[i] = value_function((x, y))
+                pass
+            # for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
+            #     # [max([float(self.agent.Q.w @ dx) for dx in xx]) for xx in self.x_cache]
+            #
+            #
+            #
+            #     Z.flat[i] = value_function((x, y))
+            # pass
+            for i in range(len(self.x_cache)):
+                Z.flat[i] = max([float(self.agent.Q.w @ dx) for dx in self.x_cache[i]])
+            pass
+
+            for i in range(len(Z.flat)):
+                ddx = (X.max() - X.min()) / (grid_size-1)
+                ddy = (Y.max() - Y.min()) / (grid_size-1)
+
+                z = colfunc(Z.flat[i], Z.min(), Z.max()+0.01, BLUE, RED)
+
+                z = tuple( int(x*255) for x in z)
+
+                xmin, ymin = x2s(X.flat[i], Y.flat[i])
+                xmax, ymax = x2s(X.flat[i]+ddx, Y.flat[i]+ddy)
+
+                self.ga.rectangle(color=z, x=xmin, y=ymin, width=xmax-xmin, height=ymax-ymin)
+            pass
+            # colfunc(val, minval, maxval, startcolor, stopcolor):
+
+        self.ga.rectangle(color=BLACK, x=xmin, y=ymin, width=xmax - xmin, height=ymax - ymin, border=1)
+        self.ga.circle("asdf", (px, py), r=5, fillColor=(200, 200, 200))
+
+        return self.ga.blit(render_mode=self.env.render_mode)
+
+        # self.surf = pygame.transform.flip(self.surf, False, True)
+        # self.screen.blit(self.surf, (0, 0))
+        # if self.render_mode == "human":
+        #     pygame.event.pump()
+        #     self.clock.tick(self.metadata["render_fps"])
+        #     pygame.display.flip()
+        #
+        # elif self.render_mode == "rgb_array":
+        #     return np.transpose(
+        #         np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+        #     )
+
+    def close(self):
+        self.ga.close()
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+    from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+    # env = gym.make("MountainCar-v0")
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
--- a/irlc/lectures/lec11/sarsa_nstep.py
+++ b/irlc/lectures/lec11/sarsa_nstep.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+from irlc import interactive, train
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = SarsaDelayNAgent(env, gamma=1, epsilon=0.1, alpha=0.9, n=1) # Exam problem.
+    # agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.2, n=1)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
--- a/irlc/tests/tests_week10.py
+++ b/irlc/tests/tests_week10.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex10.question_td0 import a_compute_deltas, b_perform_td0, c_perform_td0_batched
+from unitgrade import Report, UTestCase, cache
+from irlc import train
+import irlc.ex10.envs
+import gymnasium as gym
+from gymnasium.wrappers import TimeLimit
+from irlc.tests.tests_week08 import train_recording
+
+
+class MCAgentQuestion(UTestCase):
+    """ Test of MC agent """
+    def get_env_agent(self):
+        from irlc.ex10.mc_agent import MCAgent
+        env = gym.make("SmallGridworld-v0")
+        env = TimeLimit(env, max_episode_steps=1000)
+        gamma = .8
+        agent = MCAgent(env, gamma=gamma, first_visit=True)
+        return env, agent
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.Q.to_dict()
+
+    def test_Q_function(self):
+        trajectories, Q = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        Qc = []
+        Qe = []
+        for s, qa in Q.items():
+            for a,q in qa.items():
+                Qe.append(q)
+                Qc.append(agent.Q[s,a])
+
+        self.assertL2(Qe, Qc, tol=1e-5)
+
+
+# class BlackjackQuestion(UTestCase):
+#     """ MC policy evaluation agent and Blacjack """
+#     def test_blackjack_mc(self):
+#         env = gym.make("Blackjack-v1")
+#         episodes = 50000
+#         from irlc.ex10.mc_evaluate import MCEvaluationAgent
+#         from irlc.ex10.mc_evaluate_blackjack import get_by_ace, to_matrix, policy20
+#         agent = MCEvaluationAgent(env, policy=policy20, gamma=1)
+#         train(env, agent, num_episodes=episodes)
+#         w = get_by_ace(agent.v, ace=True)
+#         X, Y, Z = to_matrix(w)
+#         print(Z)
+#         print(Z.dtype)
+#         self.assertL2(Z, tol=2.5)
+
+
+class TD0Question(UTestCase):
+    """ Test of TD(0) evaluation agent """
+    gamma = 0.8
+
+    def get_env_agent(self):
+        from irlc.ex10.td0_evaluate import TD0ValueAgent
+        env = gym.make("SmallGridworld-v0")
+        # env = TimeLimit(env, max_episode_steps=1000)
+        agent = TD0ValueAgent(env, gamma=self.gamma)
+        return env, agent
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.v
+
+    def test_value_function(self):
+        # for k in range(1000):
+        trajectories, v = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        Qc = []
+        Qe = []
+        for s, value in v.items():
+            Qe.append(value)
+            Qc.append(agent.v[s])
+
+        self.assertL2(Qe, Qc, tol=1e-5)
+
+class MCEvaluationQuestion(TD0Question):
+    """ Test of MC evaluation agent """
+    def get_env_agent(self):
+        from irlc.ex10.mc_evaluate import MCEvaluationAgent
+        env = gym.make("SmallGridworld-v0")
+        env = TimeLimit(env, max_episode_steps=1000)
+        gamma = .8
+        agent = MCEvaluationAgent(env, gamma=gamma, first_visit=True)
+        return env, agent
+
+
+class ExamQuestionTD0(UTestCase):
+
+    def get_problem(self):
+        states = [1, 0, 2, -1, 2, 4, 5, 4, 3, 2, 1, -1]
+        rewards = [1, 1, -1, 0, 1, 2, 2, 0, 0, -1, 1]
+        v = {s: 0 for s in states}
+        gamma = 0.9
+        alpha = 0.2
+        return v, states, rewards, gamma, alpha
+
+    def test_a(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(a_compute_deltas(v, states, rewards, gamma))
+
+    def test_b(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(b_perform_td0(v, states, rewards, gamma, alpha))
+
+    def test_c(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(c_perform_td0_batched(v, states, rewards, gamma, alpha))
+class Week10Tests(Report):
+    title = "Tests for week 10"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [(MCAgentQuestion, 10),
+                (MCEvaluationQuestion, 10),
+                # (BlackjackQuestion,5),
+                 (TD0Question, 10),
+                 (ExamQuestionTD0, 10),
+                 ]
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week10Tests())
--- a/irlc/tests/tests_week11.py
+++ b/irlc/tests/tests_week11.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import UTestCase, Report, cache
+import numpy as np
+from irlc import train
+import irlc.ex10.envs
+import gymnasium as gym
+from irlc.tests.tests_week08 import train_recording
+from irlc.tests.tests_week10 import TD0Question, MCAgentQuestion
+
+
+# This problem no longer exists.
+# class NStepSarseEvaluationQuestion(TD0Question):
+#     """ Test of TD-n evaluation agent """
+#     # class EvaluateTabular(VExperienceItem):
+#     #     title = "Value-function test"
+#     gamma = 0.8
+#     def get_env_agent(self):
+#         envn = "SmallGridworld-v0"
+#         from irlc.ex11.nstep_td_evaluate import TDnValueAgent
+#         env = gym.make(envn)
+#         agent = TDnValueAgent(env, gamma=self.gamma, n=5)
+#         return env, agent
+
+
+
+class QAgentQuestion(MCAgentQuestion):
+    """ Test of Q Agent """
+    # class EvaluateTabular(QExperienceItem):
+    #     title = "Q-value test"
+
+    def get_env_agent(self):
+        from irlc.ex11.q_agent import QAgent
+        env = gym.make("SmallGridworld-v0")
+        agent = QAgent(env, gamma=.8)
+        return env, agent
+
+
+# class LinearWeightVectorTest(UTestCase):
+
+
+
+# class LinearValueFunctionTest(LinearWeightVectorTest):
+#     title = "Linear value-function test"
+#     def compute_answer_print(self):
+#         trajectories, Q = self.precomputed_payload()
+#         env, agent = self.get_env_agent()
+#         train_recording(env, agent, trajectories)
+#         self.Q = Q
+#         self.question.agent = agent
+#         vfun = [agent.Q[s,a] for s, a in zip(trajectories[0].state, trajectories[0].action)]
+#         return vfun
+
+# class TabularAgentStub(UTestCase):
+#
+#     pass
+
+class TabularAgentStub(UTestCase):
+    """ Average return over many simulated episodes """
+    gamma = 0.95
+    epsilon = 0.2
+    tol = 0.1
+    tol_qs = 0.3
+    episodes = 9000
+
+    def get_env(self):
+        return gym.make("SmallGridworld-v0")
+
+    def get_env_agent(self):
+        raise NotImplementedError()
+        # from irlc.ex11.sarsa_agent import SarsaAgent
+        # agent = SarsaAgent(self.get_env(), gamma=self.gamma)
+        # return agent.env, agent
+
+    def get_trained_agent(self):
+        env, agent = self.get_env_agent()
+        stats, _ = train(env, agent, num_episodes=self.episodes)
+        return agent, stats
+
+    def chk_accumulated_reward(self):
+        agent, stats = self.get_trained_agent()
+        s0, _ = agent.env.reset()
+        actions, qs = agent.Q.get_Qs(s0)
+        print("Tolerance is", self.tol_qs)
+        self.assertL2(qs, tol=self.tol_qs)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
+
+    # def test_accumulated_reward(self):
+    #     env, agent = self.get_env_agent()
+    #     stats, _ = train(env, agent, num_episodes=5000)
+    #     s = env.reset()
+    #     actions, qs = agent.Q.get_Qs(s)
+    #     self.assertL2(qs, tol=0.3)
+    #     self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
+
+class SarsaQuestion(TabularAgentStub):
+
+
+    def get_env_agent(self):
+        from irlc.ex11.sarsa_agent import SarsaAgent
+        agent = SarsaAgent(self.get_env(), gamma=self.gamma)
+        return agent.env, agent
+
+    def test_accumulated_reward(self):
+        self.tol_qs = 2.7 # Got 2.65 in one run.
+        self.chk_accumulated_reward()
+
+
+class NStepSarsaQuestion(TabularAgentStub):
+    title = "N-step Sarsa"
+    # class SarsaReturnItem(SarsaQuestion):
+    def get_env_agent(self):
+        from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
+        agent = SarsaNAgent(self.get_env(), gamma=self.gamma, n=5)
+        return agent.env, agent
+
+    def test_accumulated_reward(self):
+        self.tol_qs = 2.7
+        self.chk_accumulated_reward()
+
+
+class LinearAgentStub(UTestCase):
+    # class LinearExperienceItem(LinearWeightVectorTest):
+    tol = 1e-6
+    # title = "Linear sarsa agent"
+    alpha = 0.08
+    num_episodes = 300
+    # title = "Weight-vector test"
+    # testfun = QPrintItem.assertL2
+    gamma = 0.8
+    tol_w = 1e-5
+
+
+    def get_env_agent(self):
+        raise NotImplementedError()
+
+    def get_env(self):
+        return gym.make("MountainCar500-v0")
+
+    # def get_env_agent(self):
+    #     return None, None
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.Q.w
+
+    def chk_Q_weight_vector_w(self):
+        trajectories, w = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        print(w)
+        print(agent.Q.w)
+        self.assertL2(agent.Q.w, w, tol=self.tol_w)
+
+    pass
+class LinearSarsaAgentQuestion(LinearAgentStub):
+    """ Sarsa Agent with linear function approximators """
+
+    def get_env_agent(self):
+        env = self.get_env()
+        from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+        agent = LinearSemiGradSarsa(env, gamma=1, alpha=self.alpha, epsilon=0)
+        return env, agent
+
+    def test_Q_weight_vector_w(self):
+        self.tol_w = 1.4
+        self.chk_Q_weight_vector_w()
+
+class LinearQAgentQuestion(LinearAgentStub):
+    """ Test of Linear Q Agent """
+
+    def get_env_agent(self):
+        env = self.get_env()
+        alpha = 0.1
+        from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
+        agent = LinearSemiGradQAgent(env, gamma=1, alpha=alpha, epsilon=0)
+        return env, agent
+
+    def test_Q_weight_vector_w(self):
+        # self.tol_qs = 1.9
+        self.tol_w = 7
+        self.chk_Q_weight_vector_w()
+
+
+class Week11Tests(Report):
+    title = "Tests for week 11"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions =[
+        # (NStepSarseEvaluationQuestion, 10),
+        (QAgentQuestion, 10),
+        (LinearQAgentQuestion, 10),
+        (LinearSarsaAgentQuestion, 10),
+        (SarsaQuestion, 10),
+        (NStepSarsaQuestion, 5),
+        ]
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week11Tests())
--- a/irlc/tests/unitgrade_data/BanditQuestion.pkl
+++ b/irlc/tests/unitgrade_data/BanditQuestion.pkl
--- a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
+++ b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
--- a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
+++ b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
--- a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
+++ b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
--- a/irlc/tests/unitgrade_data/DirectMethods.pkl
+++ b/irlc/tests/unitgrade_data/DirectMethods.pkl
--- a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
+++ b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
--- a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
+++ b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
--- a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
+++ b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
--- a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
+++ b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
--- a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl
+++ b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl
--- a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl
+++ b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl
--- a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
+++ b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
--- a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl
+++ b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl