diff --git a/.gitignore b/.gitignore
index c014141f3718ba121bdf547318bae0ede81995fd..53552e861876c34bcb300016745c39c949987535 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,10 +10,10 @@ exam_tabular_examples
 #solutions/ex07
 #solutions/ex08
 # solutions/ex09
-solutions/ex10
-solutions/ex11
-solutions/ex12
-solutions/ex13
+#solutions/ex10
+#solutions/ex11
+#solutions/ex12
+#solutions/ex13
 
 #irlc/ex03
 #irlc/ex04
@@ -36,8 +36,8 @@ solutions/ex13
 #irlc/tests/tests_week07.py
 #irlc/tests/tests_week08.py
 # irlc/tests/tests_week09.py
-irlc/tests/tests_week10.py
-irlc/tests/tests_week11.py
+#irlc/tests/tests_week10.py
+#irlc/tests/tests_week11.py
 irlc/tests/tests_week12.py
 irlc/tests/tests_week13.py
 
@@ -74,7 +74,7 @@ irlc/exam/exam20*/solution
 #irlc/lectures/lec08
 # irlc/lectures/lec09
 #irlc/lectures/lec10
-irlc/lectures/lec11
+#irlc/lectures/lec11
 irlc/lectures/lec12
 irlc/lectures/lec13
 
diff --git a/irlc/lectures/lec10/lecture_10_mc_control.py b/irlc/lectures/lec10/lecture_10_mc_control.py
index e286478a8cabf26f4878528a1fbc0f402e5c25ef..b727d364088ca893ac99ea5bffa0302eb8b2e48c 100644
--- a/irlc/lectures/lec10/lecture_10_mc_control.py
+++ b/irlc/lectures/lec10/lecture_10_mc_control.py
@@ -2,12 +2,17 @@
 from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
 from irlc.gridworld.gridworld_environments import BookGridEnvironment
 from irlc.ex10.mc_agent import MCAgent
+# from irlc.lectures.lec10.utils import MCAgentResettable
+
 import numpy as np
 
 if __name__ == "__main__":
     np.random.seed(433)
-    env = BookGridEnvironment(render_mode='human',zoom=2)
+    env = BookGridEnvironment(render_mode='human',zoom=2, living_reward=-0.05)
     # agent = MCAgent(env, gamma=0.9, epsilon=0.15, alpha=0.1, first_visit=True)
+    from irlc.lectures.lec10.utils import agent_reset
+    MCAgent.reset = agent_reset
     agent = MCAgent(env, gamma=1.0, epsilon=0.15, alpha=None, first_visit=True)
+
     # env, agent = interactive(env, agent)
     keyboard_play(env,agent,method_label='MC control')
diff --git a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py
index c111aa624334fe8611d496bff8bd41ca0dd01ee4..32b7afad787653e53bd9c18c869814429ef81bf1 100644
--- a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py
+++ b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py
@@ -12,6 +12,8 @@ class CaughtGrid(GridworldEnvironment):
     def __init__(self, **kwargs):
         super().__init__(map, living_reward=1, zoom=1.5, **kwargs)
 
+
+
 if __name__ == "__main__":
     env = CaughtGrid(view_mode=1, render_mode='human')
     agent = MCEvaluationAgent(env, gamma=1, alpha=None)
diff --git a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py
index 4ba40a2c3ac4ad28e12c4b0ee1b16d76c9adc667..bdba3e189f5f35f38f8f3ad73c35ab6e70d8228c 100644
--- a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py
+++ b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py
@@ -11,21 +11,6 @@ def keyboard_play(env, agent, method_label='MC',autoplay=False, num_episodes=100
     env.close()
 
 
-def automatic_play(env, agent, method_label='MC'):
-    # agent = PlayWrapper(agent, env)
-    env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
-    train(env, agent, num_episodes=1000)
-    env.close()
-
-def automatic_play_value(env, agent, method_label='MC'):
-    agent.label = method_label
-    env, agent = interactive(env, agent)
-
-    # env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('v'), render_kwargs={'method_label': method_label})
-    # agent = PlayWrapper(agent, env)
-    train(env, agent, num_episodes=1000)
-    env.close()
-
 if __name__ == "__main__":
     env = BookGridEnvironment(render_mode='human', zoom=2, living_reward=-0.05)
     from irlc.ex10.mc_agent import MCAgent
diff --git a/irlc/lectures/lec10/lecture_10_mc_value_every.py b/irlc/lectures/lec10/lecture_10_mc_value_every.py
index 8598fa5e78834d5337f33217a21eeb7694af587e..d42c5ac8aa80eca9d1b82a5cb56aa35436d1b665 100644
--- a/irlc/lectures/lec10/lecture_10_mc_value_every.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_every.py
@@ -6,6 +6,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
 
 if __name__ == "__main__":
     env = BookGridEnvironment(view_mode=1, render_mode='human', living_reward=-0.05)
-    agent = MCEvaluationAgent(env, gamma=.9, alpha=None, first_visit=False)
+    agent = MCEvaluationAgent(env, gamma=1, alpha=None, first_visit=False)
 
     keyboard_play_value(env,agent,method_label='MC every')
diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py
index c998543f234744811dbbf68613dce641776f1934..17406612468718a3279716d5d2791966cf48a027 100644
--- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py
@@ -6,6 +6,7 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
 import numpy as np
 from irlc import interactive, train
 
+
 class MCAgentOneState(MCEvaluationAgent):
     def __init__(self, *args, state=None, **kwargs):
         a = 34
@@ -17,18 +18,19 @@ class MCAgentOneState(MCEvaluationAgent):
 
     def _clear_states(self, val=None):
         for s in self.env.mdp.nonterminal_states:
-            # for a in self.env.mdp.A(s):
-            # self.Q[s,a] = 0
             if s != self.state:
                 self.returns_sum_S[s] = val
                 self.returns_count_N[s] = val
-
                 if s in self.v:
                     k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
                     if not self.env.mdp.is_terminal(k):
 
                         del self.v[s]
 
+    def reset(self):
+        from irlc.lectures.lec10.utils import agent_reset
+        agent_reset(self)
+        self._clear_states(None)
 
     def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
         # self.episode = [e for e in self.episode if e[0] == self.state]
@@ -39,6 +41,7 @@ class MCAgentOneState(MCEvaluationAgent):
 
 if __name__ == "__main__":
     env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=True, zoom=2)
+
     agent = MCAgentOneState(env, gamma=1, alpha=None, first_visit=True)
     method_label = 'MC (gamma=1)'
     agent.label = method_label
@@ -49,16 +52,3 @@ if __name__ == "__main__":
     num_episodes = 1000
     train(env, agent, num_episodes=num_episodes)
     env.close()
-
-    import matplotlib.pyplot as plt
-    import numpy as np
-
-    import matplotlib.pyplot as plt
-    import numpy as np
-
-    lt = np.linspace(np.log(1000), np.log(2000) + 0*5000)
-    plt.plot(lt, 5 + 2 * np.sqrt(lt / 500), 'k-')
-    plt.plot(lt, 10 + 2 * np.sqrt(lt / (np.exp(lt) - 500)), 'r-')
-    plt.xlabel('log(t)')
-    plt.show()
-    # keyboard_play(env,agent,method_label='MC (alpha=0.5)')
diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py
index 6567221b84c2df45f4c73f7921df5173c7e66608..4f7c8d223c5fad57c970144ed17aa326eb0bc7cc 100644
--- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py
@@ -7,40 +7,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
 import numpy as np
 from irlc import interactive, train
 
-# class MCAgentOneState(MCEvaluationAgent):
-#     def __init__(self, *args, state=None, **kwargs):
-#         a = 34
-#         super().__init__(*args, **kwargs)
-#         if state is None:
-#             state = self.env.mdp.initial_state
-#         self.state = state
-#         self._clear_states()
-#
-#     def _clear_states(self, val=None):
-#         for s in self.env.mdp.nonterminal_states:
-#             # for a in self.env.mdp.A(s):
-#             # self.Q[s,a] = 0
-#             if s != self.state:
-#                 self.returns_sum_S[s] = val
-#                 self.returns_count_N[s] = val
-#                 if s in self.v:
-#                     k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
-#                     if not self.env.mdp.is_terminal(k):
-#
-#                         del self.v[s]
-#
-#     def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
-#         # self.episode = [e for e in self.episode if e[0] == self.state]
-#         self._clear_states(0)
-#         super().train(s, a, r, sp, done)
-#         # Clear out many of the state, actions:
-#         self._clear_states(None)
-#         # for s in self.env.mdp.nonterminal_states:
-#         #     if s != self.state:
-#         #         self.v[s] = None
-#
-#         pass
-
 
 if __name__ == "__main__":
     env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
diff --git a/irlc/lectures/lec10/lecture_10_td_keyboard.py b/irlc/lectures/lec10/lecture_10_td_keyboard.py
index 8787900face05cca2791b80d72fc51323dec2392..d1c9d9d9f921cb6306549d4a1769f4104fd10413 100644
--- a/irlc/lectures/lec10/lecture_10_td_keyboard.py
+++ b/irlc/lectures/lec10/lecture_10_td_keyboard.py
@@ -2,8 +2,10 @@
 from irlc.lectures.lec10.lecture_10_mc_q_estimation import automatic_play_value
 from irlc.gridworld.gridworld_environments import BookGridEnvironment
 from irlc.ex10.td0_evaluate import TD0ValueAgent
+from irlc.lectures.lec10.utils import agent_reset
 
 if __name__ == "__main__":
     env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    TD0ValueAgent.reset = agent_reset
     agent = TD0ValueAgent(env, gamma=1.0, alpha=0.2)
     automatic_play_value(env,agent,method_label='TD(0)')
diff --git a/irlc/lectures/lec10/utils.py b/irlc/lectures/lec10/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc894541b47de7124844437e2a5a572b5ff392d7
--- /dev/null
+++ b/irlc/lectures/lec10/utils.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex10.mc_agent import MCAgent
+from irlc.ex09.rl_agent import TabularQ
+
+class MCAgentResettable(MCAgent):
+    def reset(self):
+        return agent_reset(self)
+
+def agent_reset(self):
+    # General reset option. Wroks on many agents.
+    attrs = ['returns_sum_S', 'returns_count_N', 'Q', 'v']
+
+    for attr in attrs:
+        if hasattr(self, attr):
+            at = getattr(self, attr)
+            if isinstance(at, dict):
+                at.clear()
+
+    if hasattr(self, 'Q') and isinstance(self.Q, TabularQ):
+        self.Q.q_.clear()
diff --git a/irlc/lectures/lec11/__init__.py b/irlc/lectures/lec11/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be
--- /dev/null
+++ b/irlc/lectures/lec11/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec11/lecture_10_grid_lin_q.py b/irlc/lectures/lec11/lecture_10_grid_lin_q.py
new file mode 100644
index 0000000000000000000000000000000000000000..659201d8487242b35aaa56cde863327a2d341595
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_10_grid_lin_q.py
@@ -0,0 +1,10 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.berkley.rl.semi_grad_q import LinearSemiGradQAgent
+from irlc.ex11.feature_encoder import GridworldXYEncoder
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = LinearSemiGradQAgent(env, gamma=0.95, epsilon=0.1, alpha=.01, q_encoder=GridworldXYEncoder(env))
+    keyboard_play(env, agent, method_label="Q-lin-xy")
diff --git a/irlc/lectures/lec11/lecture_10_sarsa_open.py b/irlc/lectures/lec11/lecture_10_sarsa_open.py
new file mode 100644
index 0000000000000000000000000000000000000000..5793603a4e00f1dba5cdfe30343ca2f3d2e155d3
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_10_sarsa_open.py
@@ -0,0 +1,13 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import OpenGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.sarsa_agent import SarsaAgent
+
+def open_play(Agent, method_label, frames_per_second=30, **args):
+    env = OpenGridEnvironment(render_mode='human', frames_per_second=frames_per_second)
+    agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
+    method_label = f"{method_label} (gamma=0.99, epsilon=0.1, alpha=0.5)"
+    keyboard_play(env, agent, method_label=method_label)
+
+if __name__ == "__main__":
+    open_play(SarsaAgent, method_label="Sarsa")
diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c99f0330abd3612e5ee12064c8beaa0f6be7d5f
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py
@@ -0,0 +1,25 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.pacman.pacman_resources import WHITE, BLACK
+from irlc.utils.graphics_util_pygame import GraphicsUtilGym
+from irlc.lectures.lec11.mountain_car_env import MountainCarVisualization
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+
+    # env = gym.make("MountainCar-v0")
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb94976cd04de92cdbee2b403a408e468182fcc8
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py
@@ -0,0 +1,25 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+class RandomWeightAgent(LinearSemiGradSarsa):
+    def train(self, *args, **kwargs):
+        pass
+    pass
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8709431847cbbbd30552ca2be76f1c94431fa67
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py
@@ -0,0 +1,25 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+class RandomWeightAgent(LinearSemiGradSarsa):
+    def train(self, *args, **kwargs):
+        super().train(*args, **kwargs)
+        self.Q.w = np.random.randn(self.Q.w.shape[0])
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+    # env = gym.make("MountainCar-v0")
+    agent = RandomWeightAgent(env) #(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
diff --git a/irlc/lectures/lec11/lecture_11_nstep_open.py b/irlc/lectures/lec11/lecture_11_nstep_open.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab672b2067bcd2dfeed17a719dad1c42186c32a0
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_nstep_open.py
@@ -0,0 +1,11 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+
+from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+
+from irlc.lectures.lec11.lecture_10_sarsa_open import open_play
+if __name__ == "__main__":
+    # env = OpenGridEnvironment()
+    # agent = (env, gamma=0.95, epsilon=0.1, alpha=.5)
+    open_play(SarsaDelayNAgent, method_label="N-step Sarsa n=8", n=8)
diff --git a/irlc/lectures/lec11/lecture_11_pacman_lin_q.py b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b7e121efe6485e2529359a5979091cfc207cd1a
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py
@@ -0,0 +1,32 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
+from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
+from irlc.ex11.feature_encoder import SimplePacmanExtractor
+import matplotlib.pyplot as plt
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+# from irlc import PlayWrapper
+from irlc import interactive
+
+def play_pacman(env, agent, layout = 'smallGrid'):
+    train(env, agent, num_episodes=100)
+
+    env2 = PacmanWinWrapper(env)
+
+    # env2 = Monitor(env2, directory="experiments/randomdir", force=True)
+    # env2 = VideoMonitor(env2)
+    env2, agent = interactive(env, agent)
+    agent.epsilon = 0
+    agent.alpha = 0
+    # agent = PlayWrapper(agent, env2)
+    train(env2, agent, num_episodes=100)
+    plt.show()
+    env.close()
+
+if __name__ == "__main__":
+    layout = 'smallGrid'
+    env = PacmanEnvironment(animate_movement=True, layout=layout, render_mode='human', frames_per_second=100)
+    qex = SimplePacmanExtractor(env)
+    agent = LinearSemiGradQAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8, q_encoder=qex)
+    play_pacman(env, agent, layout = 'smallGrid')
+    # main_plot('experiments/q_lin')
diff --git a/irlc/lectures/lec11/lecture_11_pacman_q.py b/irlc/lectures/lec11/lecture_11_pacman_q.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a51a0679ae8ee815a34df28dedb721b5632ebee
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_pacman_q.py
@@ -0,0 +1,35 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+# from irlc.utils.player_wrapper_pyglet import PlayWrapper
+from irlc import main_plot
+import matplotlib.pyplot as plt
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+# from irlc.lectures.lecture_09_mc import keyboard_play
+from irlc.ex11.q_agent import QAgent
+from irlc import interactive
+
+
+def play_pacman(env, agent, layout = 'smallGrid'):
+
+    train(env, agent, num_episodes=100)
+    env2 = PacmanWinWrapper(env)
+    # env2 = Monitor(env2, directory="experiments/randomdir", force=True)
+    # env2 = VideoMonitor(env2)
+    env2, agent = interactive(env2, agent)
+    agent.epsilon = 0
+    agent.alpha = 0
+    # agent = PlayWrapper(agent, env2)
+    train(env2, agent, num_episodes=100)
+    plt.show()
+    env.close()
+
+if __name__ == "__main__":
+    layout = 'smallGrid'
+    env = PacmanEnvironment(animate_movement=False, layout=layout, render_mode='human')
+    agent = QAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8)
+    # from irlc import PlayWrapper
+    # agent = PlayWrapper(agent, env)
+    play_pacman(env, agent, layout = 'smallGrid')
+    # main_plot('experiments/q_lin')
diff --git a/irlc/lectures/lec11/lecture_11_q.py b/irlc/lectures/lec11/lecture_11_q.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3df9dbb8f1836bfbe0c622be1212acbb57b6367
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_q.py
@@ -0,0 +1,10 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.q_agent import QAgent
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = QAgent(env, gamma=0.95, epsilon=0.1, alpha=.2)
+    keyboard_play(env, agent, method_label="Q-learning")
diff --git a/irlc/lectures/lec11/lecture_11_q_cliff.py b/irlc/lectures/lec11/lecture_11_q_cliff.py
new file mode 100644
index 0000000000000000000000000000000000000000..421db1fa16764a3b432bd03d4a072f2108dabe77
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_q_cliff.py
@@ -0,0 +1,18 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
+from irlc.ex11.q_agent import QAgent
+
+
+# def cliffwalk(env, agent, method_label="method"):
+#     agent = PlayWrapper(agent, env)
+    # env = VideoMonitor(env, agent=agent, fps=100, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
+    # train(env, agent, num_episodes=200)
+    # env.close()
+
+from irlc.lectures.lec11.lecture_11_sarsa_cliff import cliffwalk, gamma, alpha, epsi
+if __name__ == "__main__":
+    import numpy as np
+    np.random.seed(1)
+    env = CliffGridEnvironment2(zoom=.8, render_mode='human')
+    agent = QAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
+    cliffwalk(env, agent, method_label="Q-learning")
diff --git a/irlc/lectures/lec11/lecture_11_q_open.py b/irlc/lectures/lec11/lecture_11_q_open.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0a35a5ba17fde85fb2b10da97413aba4879c5c6
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_q_open.py
@@ -0,0 +1,12 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld_pyglet.gridworld_environments import OpenGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.q_agent import QAgent
+
+def open_play(Agent, method_label, **args):
+    env = OpenGridEnvironment()
+    agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
+    keyboard_play(env, agent, method_label=method_label)
+
+if __name__ == "__main__":
+    open_play(QAgent, method_label="Q-learning")
diff --git a/irlc/lectures/lec11/lecture_11_sarsa.py b/irlc/lectures/lec11/lecture_11_sarsa.py
new file mode 100644
index 0000000000000000000000000000000000000000..791a1b4869b21c64baa09ee575019799da66f2e7
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_sarsa.py
@@ -0,0 +1,9 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.96, n=1)
+    keyboard_play(env, agent, method_label="Sarsa")
diff --git a/irlc/lectures/lec11/lecture_11_sarsa_cliff.py b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d250fa581975dbbc9fbf1fd2afebd5814c6b6e3
--- /dev/null
+++ b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py
@@ -0,0 +1,33 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.utils.player_wrapper_pyglet import PlayWrapper
+from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+from irlc import interactive
+from irlc.ex11.sarsa_agent import SarsaAgent
+
+
+def cliffwalk(env, agent, method_label="method"):
+    # agent = PlayWrapper(agent, env)
+    env.label = method_label
+    agent.method_label = method_label
+    agent.label = method_label
+    agent.method = method_label
+
+
+    env, agent = interactive(env, agent)
+    # env = VideoMonitor(env, agent=agent, fps=200, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
+    train(env, agent, num_episodes=1000)
+    env.close()
+
+epsi = 0.5
+gamma = 1.0
+alpha = .3
+
+if __name__ == "__main__":
+    import numpy as np
+    np.random.seed(1)
+    env = CliffGridEnvironment2(zoom=.8, render_mode='human')
+    agent = SarsaAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
+    # agent = QAgent(env, gamma=0.95, epsilon=0.5, alpha=.2)
+    cliffwalk(env, agent, method_label="Sarsa")
diff --git a/irlc/lectures/lec11/mountain_car_env.py b/irlc/lectures/lec11/mountain_car_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..c105e3aca23cbc864c96e2c7ab14a41aa8a1b53a
--- /dev/null
+++ b/irlc/lectures/lec11/mountain_car_env.py
@@ -0,0 +1,326 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from gymnasium.envs.classic_control import MountainCarEnv
+import math
+from typing import Optional
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+from gymnasium.envs.classic_control import utils
+from gymnasium.error import DependencyNotInstalled
+
+class FancyMountainCar(MountainCarEnv):  # piggybag on the original env.
+    visualization = None
+
+    def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
+        super().__init__(render_mode=render_mode, goal_velocity=goal_velocity)
+
+    def render(self):
+        if self.visualization is None:
+            self.visualization = MountainCarVisualization(self, self.agent if hasattr(self, 'agent') else None)
+        return self.visualization.render()
+
+    def close(self):
+        if self.visualization is not None:
+            self.visualization.close()
+
+
+from irlc.pacman.pacman_resources import WHITE, BLACK
+from irlc.utils.graphics_util_pygame import GraphicsUtilGym
+class MountainCarVisualization:
+    def __init__(self, env, agent):
+        self.env = env
+        self.agent = agent
+
+        # self.k = 0
+        # self.states = []
+        # self.actions = []
+        # self.factories = []
+        # self.inventory = inventory
+        # xmin = -0.2
+        # xmax = inventory.N * 2 + 1.4
+        # xmax = 4
+
+        # ymin = -0.4
+        # ymax = 1.4 + 0.2
+        # dx = xmax - xmin
+        # dy = ymax - ymin
+        self.ga = GraphicsUtilGym()
+        # screen_width = 1300
+        screen_width = env.screen_width * 2
+        #
+        # -env.min_position
+        # env.max_position
+
+        xmin = env.min_position
+        xmax = env.max_position + 1.8
+        # env._height
+
+        screen_height = env.screen_height
+        ymin = 0
+        ymax = 1.2
+        # screen_height = dy * (screen_width / dx)
+        frames_per_second = 30
+        self.ga.begin_graphics(screen_width, screen_height,
+                               local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second,
+                               color=WHITE, title=f"MountainCar Environment")
+
+        # self.last_action = None
+        # self.agent = None
+        # self.last_reward = None
+        # self.scale = screen_width / dx
+
+    x_cache = []
+
+
+    def render(self):
+        # if self.env.render_mode is None:
+        #     assert self.env.spec is not None
+        #     gym.logger.warn(
+        #         "You are calling render method without specifying any render mode. "
+        #         "You can specify the render_mode at initialization, "
+        #         f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
+        #     )
+        #     return
+        # try:
+        #     import pygame
+        #     from pygame import gfxdraw
+        # except ImportError as e:
+        #     raise DependencyNotInstalled(
+        #         'pygame is not installed, run `pip install "gymnasium[classic_control]"`'
+        #     ) from e
+
+        #
+        #
+        # if self.screen is None:
+        #     pygame.init()
+        #     if self.render_mode == "human":
+        #         pygame.display.init()
+        #         self.screen = pygame.display.set_mode(
+        #             (self.screen_width, self.screen_height)
+        #         )
+        #     else:  # mode in "rgb_array"
+        #         self.screen = pygame.Surface((self.screen_width, self.screen_height))
+        # if self.clock is None:
+        #     self.clock = pygame.time.Clock()
+        self.ga.draw_background()
+        # self.ga.circle("sadf", pos=(0,0), r=100, fillColor=(100, 10, 50))
+
+        pos = self.env.state[0]
+        scale = 1
+
+        xs = np.linspace(self.env.min_position, self.env.max_position, 100)
+        ys = self.env._height(xs)
+        # xys = list(zip((xs - self.env.min_position) * scale, ys * scale))
+
+        self.ga.polyline("asdfasfd", xs=xs, ys=ys, width=1)
+
+
+        # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+
+
+
+        world_width = self.env.max_position - self.env.min_position
+        # scale = self.screen_width / world_width
+        rscale = self.env.screen_width / world_width
+
+        carwidth = 40 / rscale
+        carheight = 20 / rscale
+
+        # self.surf = pygame.Surface((self.screen_width, self.screen_height))
+        # self.surf.fill((255, 255, 255))
+
+        # pos = self.state[0]
+
+        # xs = np.linspace(self.min_position, self.max_position, 100)
+        # ys = self._height(xs)
+        # xys = list(zip((xs - self.min_position) * scale, ys * scale))
+
+        # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
+        import pygame
+        clearance = 10 / rscale
+        # clearance=0.01
+
+        l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
+        coords = []
+        for c in [(l, b), (l, t), (r, t), (r, b)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            coords.append(
+                (
+                    c[0] + (pos - 0*self.env.min_position) * scale,
+                    c[1] + clearance + self.env._height(pos) * scale,
+                )
+            )
+        self.ga.polygon("adsfasdf", coords=coords, outlineColor=BLACK, fillColor=BLACK, width=2)
+        # gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
+        # gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
+
+
+        for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
+            c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
+            wheel = (
+                c[0] + (pos - 0*self.env.min_position) * scale,
+                c[1] + clearance + self.env._height(pos) * scale,
+            )
+
+            # gfxdraw.aacircle(
+            #     self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
+            # )
+
+            self.ga.circle("asdf", (wheel[0], wheel[1]),  int(carheight / 2.5*rscale), fillColor=(128, 128, 128), outlineColor= (70, 70, 70))
+            #
+            # gfxdraw.filled_circle(
+            #     self.surf, wheel[0], wheel[1], int(carheight / 2.5 * rscale), (128, 128, 128)
+            # )
+
+        flagx = (self.env.goal_position - 0*self.env.min_position) * scale
+        flagy1 = self.env._height(self.env.goal_position) * scale
+        flagy2 = flagy1 + 50/rscale
+        self.ga.line("asdfasdf", (flagx, flagy1), (flagx, flagy2), color=(0, 0, 0))
+
+        self.ga.polygon(
+                "sdfasdf",
+            [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
+            (204, 204, 0),
+        )
+        # gfxdraw.aapolygon(
+        #     self.surf,
+        #     [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)],
+        #     (204, 204, 0),
+        # )
+        # gfxdraw.filled_polygon(
+        #     self.surf,
+        #     [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5)],
+        #     (204, 204, 0),
+        # )
+        # Optionally draw the value functino.
+        # oxmin = 0.6
+        # oxmax = 1.7
+        # oymin = 0
+        # oymax = 1
+
+        # self.env.observation_space
+        # dx = 1.5
+        # dy = 0
+
+        # sX = 1
+        # sY = 1
+
+        # Pscale = 1
+        Vscale = 6
+
+        # def pos2s(pos):#, vel):
+        #     return pos + 1.8 #, (vel + 0.2) * 3
+        # def vel2s(vel):
+        #     return (vel + 0.) * Vscale
+
+        def x2s(pos, vel):
+            return pos + 1.75, (vel + 0.1) * Vscale
+
+        xmin,ymin = x2s(self.env.observation_space.low[0], self.env.observation_space.low[1] )
+        xmax,ymax = x2s(self.env.observation_space.high[0], self.env.observation_space.high[1] )
+
+        px, py = x2s( *np.asarray(self.env.state).tolist())
+
+
+
+        # self.env.observation_space.low
+        if self.agent is not None:
+
+            def colfunc(val, minval, maxval, startcolor, stopcolor):
+                """ Convert value in the range minval...maxval to a color in the range
+                    startcolor to stopcolor. The colors passed and the one returned are
+                    composed of a sequence of N component values (e.g. RGB).
+                """
+                f = float(val - minval) / (maxval - minval)
+                return tuple( float( f * (b - a) + a) for (a, b) in zip(startcolor, stopcolor))
+
+            RED, YELLOW, GREEN = (1, 0, 0), (1, 1, 0), (0, 1, 0)
+            CYAN, BLUE, MAGENTA = (0, 1, 1), (0, 0, 1), (1, 0, 1)
+            steps = 10
+            minval, maxval = 0.0, 1.0
+            # incr = (maxval - minval) / steps
+            # for i in range(steps + 1):
+            #     val = minval + round(i * incr, 1)
+            #     # print('{:.1f} -> ({:.3f}, {:.3f}, {:.3f})'.format(
+            #     #     val, *colfunc(val, minval, maxval, BLUE, RED)))
+
+            value_function = lambda s: -max(self.agent.Q.get_Qs(s)[1])
+
+            grid_size = 40
+            # grid_size = 30
+            low = self.env.unwrapped.observation_space.low
+            high = self.env.unwrapped.observation_space.high
+            X, Y = np.meshgrid(np.linspace(low[0], high[0], grid_size), np.linspace(low[1], high[1], grid_size))
+            Z = X * 0
+
+            if self.x_cache is None or len(self.x_cache) == 0:
+                for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
+                    s = (x, y)
+                    xx = [self.agent.Q.x(s, a) for a in range(self.env.action_space.n) ]
+                    self.x_cache.append(xx)
+                    # Z.flat[i] = value_function((x, y))
+                pass
+            # for i, (x, y) in enumerate(zip(X.flat, Y.flat)):
+            #     # [max([float(self.agent.Q.w @ dx) for dx in xx]) for xx in self.x_cache]
+            #
+            #
+            #
+            #     Z.flat[i] = value_function((x, y))
+            # pass
+            for i in range(len(self.x_cache)):
+                Z.flat[i] = max([float(self.agent.Q.w @ dx) for dx in self.x_cache[i]])
+            pass
+
+            for i in range(len(Z.flat)):
+                ddx = (X.max() - X.min()) / (grid_size-1)
+                ddy = (Y.max() - Y.min()) / (grid_size-1)
+
+                z = colfunc(Z.flat[i], Z.min(), Z.max()+0.01, BLUE, RED)
+
+                z = tuple( int(x*255) for x in z)
+
+                xmin, ymin = x2s(X.flat[i], Y.flat[i])
+                xmax, ymax = x2s(X.flat[i]+ddx, Y.flat[i]+ddy)
+
+                self.ga.rectangle(color=z, x=xmin, y=ymin, width=xmax-xmin, height=ymax-ymin)
+            pass
+            # colfunc(val, minval, maxval, startcolor, stopcolor):
+
+        self.ga.rectangle(color=BLACK, x=xmin, y=ymin, width=xmax - xmin, height=ymax - ymin, border=1)
+        self.ga.circle("asdf", (px, py), r=5, fillColor=(200, 200, 200))
+
+        return self.ga.blit(render_mode=self.env.render_mode)
+
+        # self.surf = pygame.transform.flip(self.surf, False, True)
+        # self.screen.blit(self.surf, (0, 0))
+        # if self.render_mode == "human":
+        #     pygame.event.pump()
+        #     self.clock.tick(self.metadata["render_fps"])
+        #     pygame.display.flip()
+        #
+        # elif self.render_mode == "rgb_array":
+        #     return np.transpose(
+        #         np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
+        #     )
+
+    def close(self):
+        self.ga.close()
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+    from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+    # env = gym.make("MountainCar-v0")
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
diff --git a/irlc/lectures/lec11/sarsa_nstep.py b/irlc/lectures/lec11/sarsa_nstep.py
new file mode 100644
index 0000000000000000000000000000000000000000..7687d1736244fd5531c35cd54ebdac7c25fc0a61
--- /dev/null
+++ b/irlc/lectures/lec11/sarsa_nstep.py
@@ -0,0 +1,11 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+from irlc import interactive, train
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = SarsaDelayNAgent(env, gamma=1, epsilon=0.1, alpha=0.9, n=1) # Exam problem.
+    # agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.2, n=1)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
diff --git a/irlc/tests/tests_week10.py b/irlc/tests/tests_week10.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5dd4e6580fd2cd8dcebf7de0ba5f90e9edd9ca8
--- /dev/null
+++ b/irlc/tests/tests_week10.py
@@ -0,0 +1,132 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex10.question_td0 import a_compute_deltas, b_perform_td0, c_perform_td0_batched
+from unitgrade import Report, UTestCase, cache
+from irlc import train
+import irlc.ex10.envs
+import gymnasium as gym
+from gymnasium.wrappers import TimeLimit
+from irlc.tests.tests_week08 import train_recording
+
+
+class MCAgentQuestion(UTestCase):
+    """ Test of MC agent """
+    def get_env_agent(self):
+        from irlc.ex10.mc_agent import MCAgent
+        env = gym.make("SmallGridworld-v0")
+        env = TimeLimit(env, max_episode_steps=1000)
+        gamma = .8
+        agent = MCAgent(env, gamma=gamma, first_visit=True)
+        return env, agent
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.Q.to_dict()
+
+    def test_Q_function(self):
+        trajectories, Q = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        Qc = []
+        Qe = []
+        for s, qa in Q.items():
+            for a,q in qa.items():
+                Qe.append(q)
+                Qc.append(agent.Q[s,a])
+
+        self.assertL2(Qe, Qc, tol=1e-5)
+
+
+# class BlackjackQuestion(UTestCase):
+#     """ MC policy evaluation agent and Blacjack """
+#     def test_blackjack_mc(self):
+#         env = gym.make("Blackjack-v1")
+#         episodes = 50000
+#         from irlc.ex10.mc_evaluate import MCEvaluationAgent
+#         from irlc.ex10.mc_evaluate_blackjack import get_by_ace, to_matrix, policy20
+#         agent = MCEvaluationAgent(env, policy=policy20, gamma=1)
+#         train(env, agent, num_episodes=episodes)
+#         w = get_by_ace(agent.v, ace=True)
+#         X, Y, Z = to_matrix(w)
+#         print(Z)
+#         print(Z.dtype)
+#         self.assertL2(Z, tol=2.5)
+
+
+class TD0Question(UTestCase):
+    """ Test of TD(0) evaluation agent """
+    gamma = 0.8
+
+    def get_env_agent(self):
+        from irlc.ex10.td0_evaluate import TD0ValueAgent
+        env = gym.make("SmallGridworld-v0")
+        # env = TimeLimit(env, max_episode_steps=1000)
+        agent = TD0ValueAgent(env, gamma=self.gamma)
+        return env, agent
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.v
+
+    def test_value_function(self):
+        # for k in range(1000):
+        trajectories, v = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        Qc = []
+        Qe = []
+        for s, value in v.items():
+            Qe.append(value)
+            Qc.append(agent.v[s])
+
+        self.assertL2(Qe, Qc, tol=1e-5)
+
+class MCEvaluationQuestion(TD0Question):
+    """ Test of MC evaluation agent """
+    def get_env_agent(self):
+        from irlc.ex10.mc_evaluate import MCEvaluationAgent
+        env = gym.make("SmallGridworld-v0")
+        env = TimeLimit(env, max_episode_steps=1000)
+        gamma = .8
+        agent = MCEvaluationAgent(env, gamma=gamma, first_visit=True)
+        return env, agent
+
+
+class ExamQuestionTD0(UTestCase):
+
+    def get_problem(self):
+        states = [1, 0, 2, -1, 2, 4, 5, 4, 3, 2, 1, -1]
+        rewards = [1, 1, -1, 0, 1, 2, 2, 0, 0, -1, 1]
+        v = {s: 0 for s in states}
+        gamma = 0.9
+        alpha = 0.2
+        return v, states, rewards, gamma, alpha
+
+    def test_a(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(a_compute_deltas(v, states, rewards, gamma))
+
+    def test_b(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(b_perform_td0(v, states, rewards, gamma, alpha))
+
+    def test_c(self):
+        v, states, rewards, gamma, alpha = self.get_problem()
+        self.assertEqualC(c_perform_td0_batched(v, states, rewards, gamma, alpha))
+class Week10Tests(Report):
+    title = "Tests for week 10"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions = [(MCAgentQuestion, 10),
+                (MCEvaluationQuestion, 10),
+                # (BlackjackQuestion,5),
+                 (TD0Question, 10),
+                 (ExamQuestionTD0, 10),
+                 ]
+
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week10Tests())
diff --git a/irlc/tests/tests_week11.py b/irlc/tests/tests_week11.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f58dd129daaca02dc7c468b120640362aad8c41
--- /dev/null
+++ b/irlc/tests/tests_week11.py
@@ -0,0 +1,200 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from unitgrade import UTestCase, Report, cache
+import numpy as np
+from irlc import train
+import irlc.ex10.envs
+import gymnasium as gym
+from irlc.tests.tests_week08 import train_recording
+from irlc.tests.tests_week10 import TD0Question, MCAgentQuestion
+
+
+# This problem no longer exists.
+# class NStepSarseEvaluationQuestion(TD0Question):
+#     """ Test of TD-n evaluation agent """
+#     # class EvaluateTabular(VExperienceItem):
+#     #     title = "Value-function test"
+#     gamma = 0.8
+#     def get_env_agent(self):
+#         envn = "SmallGridworld-v0"
+#         from irlc.ex11.nstep_td_evaluate import TDnValueAgent
+#         env = gym.make(envn)
+#         agent = TDnValueAgent(env, gamma=self.gamma, n=5)
+#         return env, agent
+
+
+
+class QAgentQuestion(MCAgentQuestion):
+    """ Test of Q Agent """
+    # class EvaluateTabular(QExperienceItem):
+    #     title = "Q-value test"
+
+    def get_env_agent(self):
+        from irlc.ex11.q_agent import QAgent
+        env = gym.make("SmallGridworld-v0")
+        agent = QAgent(env, gamma=.8)
+        return env, agent
+
+
+# class LinearWeightVectorTest(UTestCase):
+
+
+
+# class LinearValueFunctionTest(LinearWeightVectorTest):
+#     title = "Linear value-function test"
+#     def compute_answer_print(self):
+#         trajectories, Q = self.precomputed_payload()
+#         env, agent = self.get_env_agent()
+#         train_recording(env, agent, trajectories)
+#         self.Q = Q
+#         self.question.agent = agent
+#         vfun = [agent.Q[s,a] for s, a in zip(trajectories[0].state, trajectories[0].action)]
+#         return vfun
+
+# class TabularAgentStub(UTestCase):
+#
+#     pass
+
+class TabularAgentStub(UTestCase):
+    """ Average return over many simulated episodes """
+    gamma = 0.95
+    epsilon = 0.2
+    tol = 0.1
+    tol_qs = 0.3
+    episodes = 9000
+
+    def get_env(self):
+        return gym.make("SmallGridworld-v0")
+
+    def get_env_agent(self):
+        raise NotImplementedError()
+        # from irlc.ex11.sarsa_agent import SarsaAgent
+        # agent = SarsaAgent(self.get_env(), gamma=self.gamma)
+        # return agent.env, agent
+
+    def get_trained_agent(self):
+        env, agent = self.get_env_agent()
+        stats, _ = train(env, agent, num_episodes=self.episodes)
+        return agent, stats
+
+    def chk_accumulated_reward(self):
+        agent, stats = self.get_trained_agent()
+        s0, _ = agent.env.reset()
+        actions, qs = agent.Q.get_Qs(s0)
+        print("Tolerance is", self.tol_qs)
+        self.assertL2(qs, tol=self.tol_qs)
+        self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
+
+    # def test_accumulated_reward(self):
+    #     env, agent = self.get_env_agent()
+    #     stats, _ = train(env, agent, num_episodes=5000)
+    #     s = env.reset()
+    #     actions, qs = agent.Q.get_Qs(s)
+    #     self.assertL2(qs, tol=0.3)
+    #     self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol)
+
+class SarsaQuestion(TabularAgentStub):
+
+
+    def get_env_agent(self):
+        from irlc.ex11.sarsa_agent import SarsaAgent
+        agent = SarsaAgent(self.get_env(), gamma=self.gamma)
+        return agent.env, agent
+
+    def test_accumulated_reward(self):
+        self.tol_qs = 2.7 # Got 2.65 in one run.
+        self.chk_accumulated_reward()
+
+
+class NStepSarsaQuestion(TabularAgentStub):
+    title = "N-step Sarsa"
+    # class SarsaReturnItem(SarsaQuestion):
+    def get_env_agent(self):
+        from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
+        agent = SarsaNAgent(self.get_env(), gamma=self.gamma, n=5)
+        return agent.env, agent
+
+    def test_accumulated_reward(self):
+        self.tol_qs = 2.7
+        self.chk_accumulated_reward()
+
+
+class LinearAgentStub(UTestCase):
+    # class LinearExperienceItem(LinearWeightVectorTest):
+    tol = 1e-6
+    # title = "Linear sarsa agent"
+    alpha = 0.08
+    num_episodes = 300
+    # title = "Weight-vector test"
+    # testfun = QPrintItem.assertL2
+    gamma = 0.8
+    tol_w = 1e-5
+
+
+    def get_env_agent(self):
+        raise NotImplementedError()
+
+    def get_env(self):
+        return gym.make("MountainCar500-v0")
+
+    # def get_env_agent(self):
+    #     return None, None
+
+    @cache
+    def compute_trajectories(self):
+        env, agent = self.get_env_agent()
+        _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100)
+        return trajectories, agent.Q.w
+
+    def chk_Q_weight_vector_w(self):
+        trajectories, w = self.compute_trajectories()
+        env, agent = self.get_env_agent()
+        train_recording(env, agent, trajectories)
+        print(w)
+        print(agent.Q.w)
+        self.assertL2(agent.Q.w, w, tol=self.tol_w)
+
+    pass
+class LinearSarsaAgentQuestion(LinearAgentStub):
+    """ Sarsa Agent with linear function approximators """
+
+    def get_env_agent(self):
+        env = self.get_env()
+        from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+        agent = LinearSemiGradSarsa(env, gamma=1, alpha=self.alpha, epsilon=0)
+        return env, agent
+
+    def test_Q_weight_vector_w(self):
+        self.tol_w = 1.4
+        self.chk_Q_weight_vector_w()
+
+class LinearQAgentQuestion(LinearAgentStub):
+    """ Test of Linear Q Agent """
+
+    def get_env_agent(self):
+        env = self.get_env()
+        alpha = 0.1
+        from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
+        agent = LinearSemiGradQAgent(env, gamma=1, alpha=alpha, epsilon=0)
+        return env, agent
+
+    def test_Q_weight_vector_w(self):
+        # self.tol_qs = 1.9
+        self.tol_w = 7
+        self.chk_Q_weight_vector_w()
+
+
+class Week11Tests(Report):
+    title = "Tests for week 11"
+    pack_imports = [irlc]
+    individual_imports = []
+    questions =[
+        # (NStepSarseEvaluationQuestion, 10),
+        (QAgentQuestion, 10),
+        (LinearQAgentQuestion, 10),
+        (LinearSarsaAgentQuestion, 10),
+        (SarsaQuestion, 10),
+        (NStepSarsaQuestion, 5),
+        ]
+if __name__ == '__main__':
+    from unitgrade import evaluate_report_student
+    evaluate_report_student(Week11Tests())
diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl
index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644
Binary files a/irlc/tests/unitgrade_data/BanditQuestion.pkl and b/irlc/tests/unitgrade_data/BanditQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl
index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl
index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644
Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl
index 30dd1062d1dd64f89fbe4a1d9559ec33ecfdec49..8d010a1467db4d221532f2ee0a7371c71c132147 100644
Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl
index 5b7d595636172fded4067cf5f187d482614b79ba..023619080482d1b79f8bf25480a74ab9f4f9b6a9 100644
Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ
diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl
index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644
Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl
index af00f83a914f3ccd605d208edf577a680f4b4822..7668f6c5761fd79c62f158071fbd494d96a842b9 100644
Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ
diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl
index 35da329cde908ee0c76542e26e45a260eda7f19f..f297836116e242443741fc97c303a7586b38e0e2 100644
Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ
diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl
index d94ebb5d1eaceac887aa37880e80bc64f85537ae..857d1b44cebdb0612421c1ec990febce99722f8c 100644
Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ
diff --git a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl
index a00b2d148c0fc04b594b8b9551574d5f265a43b1..4f921dea73c279142e67fb44ce9b2c57aa0668ce 100644
Binary files a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl and b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl differ
diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl
index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644
Binary files a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl and b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl
index 9428ff4694b4dc2cdbf360f286f3efccaa252b72..547d7a9f7b6ad4938087db86e35a9b36cee09e65 100644
Binary files a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl
index e365fec395fbccdf16de93f115cac629916b4b03..f94cda42c98f0cc556ada3f739585ba233fdd584 100644
Binary files a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl
index 69c70ecba4954fbaf7505af3246803e1403042e6..0af1b2cbfdd1441804be919a7071995cd7bd6acb 100644
Binary files a/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl and b/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl
index d0e913ff8885e29f4287fa1e21d720d5ee6fe0ed..f1d8ea1c4051989e2d07c4d440e9ea4f2aa3b24f 100644
Binary files a/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl and b/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/MCAgentQuestion.pkl b/irlc/tests/unitgrade_data/MCAgentQuestion.pkl
index c552c3c66dbd63e44581067c53e74783681ffc2a..3e631e5d95d7b0257a00cdadcb8a84bf962be8fa 100644
Binary files a/irlc/tests/unitgrade_data/MCAgentQuestion.pkl and b/irlc/tests/unitgrade_data/MCAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl b/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl
index d9cfe1215b3b0ede2b9c0e0ff35452f93eb10249..6a132b537b8762024112ca92a5dad5b3c6682bc6 100644
Binary files a/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl and b/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl b/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl
index 10b68255bebadaa13730e897e7a8cd2064666d88..d2afb2c21357b5e6d0a93407654b733d4398d13e 100644
Binary files a/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl and b/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl
index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644
Binary files a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl and b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl
index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644
Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl
index c367454afea824f4784577e595e85f92c7535338..402ac40b12769d8211ed434cc664b87f02e0be51 100644
Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl
index 8a46ae488da7c6c88812a86d3b9f5deb404329fc..e9261fc8c65ffea540aca333c5e8fc9e31b3b779 100644
Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl
index aa1f77c7396deed36b343f489727774ebcebc3df..402b6e64ed470d789707284b32dc7c0e4940adaa 100644
Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl
index 6428b65c09cb5b72e9f945066f6343f4d8a24009..4dca1ffc8ec8944f3d0619b092dbfc24d221f77d 100644
Binary files a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl and b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl
index 6e3d0a43209e6cc03cc992ef8fc4be5b219a2fa2..0133173f9abe7ce08622467ef6ce34a34987e782 100644
Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl
index c623107a7d3b7ae5b8b3ae136dbe3b0a806f400a..674e8d3f54aeed4eeb13056dc384ceaf5831f9b3 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl
index 6c26fffacf4abd79d8016b28dacc3b460d9db347..547769c9bb40f7e2f9e061a3d24943b7bf016ea1 100644
Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl
index d79217522553caef898abe605f8a24a40636a08b..dd7c5af1163a3610ddac292eac48d2bb7792bb9a 100644
Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl
index e1981fe372bc707bca0260655dc4fc4a44466b19..0ffab948d9210a04dcf3aac3fb2ed0b93dba7ca2 100644
Binary files a/irlc/tests/unitgrade_data/Problem3LQR.pkl and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl
index 839dd814eff6e41f6fb597adb852de2fbb0f8e19..d17327151a7cf7abcbd9144d790d5816c40e8376 100644
Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl
index 8b59f1de92e3fd5734568a88df1fea70ae262fe3..42b78bdb0fc666d7de65b7898084a8b2ebfc9357 100644
Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl
index b28d450ca88a391a8ccfa480734fde8141d1b6f4..b3afcdcbdf741bc3db13e7fe30128763d31475ea 100644
Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl
index ee99756d57edf4a222856e4fcb64d101eb1ad454..d11c158fe1b26097970110155f252694dbc24699 100644
Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl
index 569bd52839bd666ad594a21ada6ef6f6bff3342a..eb2ddd4971a857b980fd75550b2cc7c96ed85318 100644
Binary files a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl
index c3fd8909d810a5943171ac685a2e95326d9ae808..2dc14ad1e55bd9a85108809779f809f20ec255a3 100644
Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl
index e988ad6626a12e54cb6292ed0560835fcde9c488..9cb9f08bc370f4a30110aeaceff81fc5895be6ae 100644
Binary files a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl and b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl
index 60282c5cbf6353a285a6425a363e36a1c4658156..a41d0222d42045ede2d5eacd73513bb2a9100775 100644
Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl
index 08d0a1de9624ce2840d456c97c121036d2a6a41d..a5668f0cd26821ec7bd502c8cf6334dbae2f50ec 100644
Binary files a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl and b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl
index d313e26e5727bec7415f60a10638d703b76313a3..066b7ad5d643ab4af11fd1ff5482f06707b6e498 100644
Binary files a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl
index 7035e7095ef9ad98f42285f0164d461f0709159f..00e68ef4a2d4f3938898b5ba0813f27abf322dbc 100644
Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl
index 26117dae7081cd5dde5d04e16e133337034e99c2..f242501c0d8cfea0cfff3745b5c79a7e5c7a74a6 100644
Binary files a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl and b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl
index 3393d7eb47dbebe9239b902d13032a4eebcb48fa..515794b16a025761728494b5a33329e533e5c2d7 100644
Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl
index a8befec36a3e155f9f1fe055f8d0b2eb841f3795..b579ed7c05dd433dc7d008cdc6ade7223c149822 100644
Binary files a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl
index b5a78afcd03096134dd779734ebd7428b75101f6..5629afc31194b2fc36c1b212f28076015b775d3c 100644
Binary files a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl and b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl differ
diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl
index 32659e41fc735fcb00b86d77c302cd4f8fefd1fd..ed58c48391a348bfe8dbfeff2205acd5418c086e 100644
Binary files a/irlc/tests/unitgrade_data/Problem9Gambler.pkl and b/irlc/tests/unitgrade_data/Problem9Gambler.pkl differ
diff --git a/irlc/tests/unitgrade_data/QAgentQuestion.pkl b/irlc/tests/unitgrade_data/QAgentQuestion.pkl
index b2d68c845f99cb476a059f553a9d1fc19471869d..1564ef4461bbd73cb56dcd6940b2c522e7c4bc1d 100644
Binary files a/irlc/tests/unitgrade_data/QAgentQuestion.pkl and b/irlc/tests/unitgrade_data/QAgentQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl
index 06cde769c462c7c27150f75bf8abc31dc7c97739..7016d99087fe4b4f4ad3bc18984ce75dc502ebf3 100644
Binary files a/irlc/tests/unitgrade_data/RendevouzItem.pkl and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ
diff --git a/irlc/tests/unitgrade_data/SarsaQuestion.pkl b/irlc/tests/unitgrade_data/SarsaQuestion.pkl
index bb6c4f0ca7790d883d7e3b3c3b033768ceba51e4..2074944241492ef40be122832f251c7058ae9948 100644
Binary files a/irlc/tests/unitgrade_data/SarsaQuestion.pkl and b/irlc/tests/unitgrade_data/SarsaQuestion.pkl differ
diff --git a/irlc/tests/unitgrade_data/TD0Question.pkl b/irlc/tests/unitgrade_data/TD0Question.pkl
index 775a2d90cca489ee99b02f158b2921df81b7a6b2..801506652caca503c3463bf3c4f0c0df23d086f9 100644
Binary files a/irlc/tests/unitgrade_data/TD0Question.pkl and b/irlc/tests/unitgrade_data/TD0Question.pkl differ
diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl
index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644
Binary files a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl and b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl differ
diff --git a/irlc/utils/async_wrappers.py b/irlc/utils/async_wrappers.py
index 8dbebf533885664ea41f1a9d01e5012a20ec4490..e2df79a99a59d5017ca58c07bdd0b7d655143d2f 100644
--- a/irlc/utils/async_wrappers.py
+++ b/irlc/utils/async_wrappers.py
@@ -37,3 +37,61 @@ class AsyncTimeLimit(TimeLimit):
             truncated = True
 
         return observation, reward, terminated, truncated, info
+
+
+
+
+def _fix_webassembly_packages(yes_really_do_it=False):
+    import importlib
+    import os
+    assert yes_really_do_it, "This function is for internal use for deploying webassembly projects. Don't use it in your base dir."
+
+    spec = importlib.util.find_spec("sympy", None)
+    base = os.path.dirname(spec.origin)
+    testf = f"{base}/testing/__init__.py"
+    if base.startswith("/data/data/"):
+        # with open(testf, 'w') as f:
+        #     f.write("# Nothingatall")
+        # with open(f"{base}/testing/runtests.py", 'w') as f:
+        #     f.write("# Nothingatall")
+
+        fname = f"{base}/utilities/decorator.py"
+        assert os.path.isfile(fname)
+        code = open(fname, 'r').read()
+        with open(fname, 'w') as f:
+            # print(f"{fname=}")
+            f.write(ncode := "\n".join([l for l in code.splitlines() if not l.startswith("from sympy.testing")]))
+
+        code = open(fname := f"{base}/utilities/__init__.py", 'r').read()
+        code = code.replace("from .timeutils import timed", "timed = lambda x: 3")
+        with open(fname, 'w') as f:
+            f.write(code)
+
+        for fname in [f"{base}/core/parameters.py", f"{base}/matrices/utilities.py"]:
+            code = open(fname, 'r').read()
+            code = code.replace("from threading import local", "local = object")
+            with open(fname, 'w') as f:
+                f.write(code)
+
+        # Fix timeit.
+        code = open(fname := f"{base}/utilities/timeutils.py", 'r').read()
+        code = code.replace("import timeit", "# REMOVED")
+        with open(fname, 'w') as f:
+            f.write(code)
+
+        code = open(fname := f"{base}/testing/runtests.py", 'r').read()
+        code = code.replace("from timeit import default_timer as clock", "# REMOVED")
+        # DocTestFinder, DocTestRunner
+        #
+        # code = code.replace("import doctest as pdoctest", "# REMOVED")
+
+        # code = code.replace("from doctest import DocTestFinder, DocTestRunner", "DocTestFinder, DocTestRunner = object, object")
+        # code = code.replace("pdoctest._indent", "#REMOVED")
+        # code = code.replace("import doctest", "# REMOVED")
+
+        with open(fname, 'w') as f:
+            f.write(code)
+        print("Patched ok.")
+        """NB. Remember to also patch Decimal by adding extra stuff like exceptions to the decimal-module which is masked by webassembly."""
+
+    pass
diff --git a/irlc/utils/player_wrapper.py b/irlc/utils/player_wrapper.py
index e84b48fec75e81c155a330565979af3a2cd6fe6d..be01959d7ffeda87bd2eebe6efc825981459e47d 100644
--- a/irlc/utils/player_wrapper.py
+++ b/irlc/utils/player_wrapper.py
@@ -88,11 +88,14 @@ async def _webassembly_interactive(env, agent, autoplay=False):
     def filled_circle(surface, x, y, r, color):
         pygame.draw.circle(surface, color, (x, y), r, width=0)
 
+    def hline(surface, x1, x2, y, color):
+        pygame.draw.line(surface, color,  (x1, y), (x2, y) )
 
     gfxdraw.aapolygon = aapolygon
     gfxdraw.filled_polygon = filled_polygon
     gfxdraw.aacircle = aacircle
     gfxdraw.filled_circle = filled_circle
+    gfxdraw.hline = hline
 
 
     # from irlc.utils.player_wrapper import AsyncPlayerWrapperPygame