Week 11

2b44091b · tuhe · c16ac062 · 2b44091b · 2b44091b · 2b44091b
Commit 2b44091b authored 2 months ago by tuhe
--- a/.gitignore
+++ b/.gitignore
@@ -10,10 +10,10 @@ exam_tabular_examples
 #solutions/ex07
 #solutions/ex08
 # solutions/ex09
-solutions/ex10
-solutions/ex11
-solutions/ex12
-solutions/ex13
+#solutions/ex10
+#solutions/ex11
+#solutions/ex12
+#solutions/ex13

 #irlc/ex03
 #irlc/ex04
@@ -36,8 +36,8 @@ solutions/ex13
 #irlc/tests/tests_week07.py
 #irlc/tests/tests_week08.py
 # irlc/tests/tests_week09.py
-irlc/tests/tests_week10.py
-irlc/tests/tests_week11.py
+#irlc/tests/tests_week10.py
+#irlc/tests/tests_week11.py
 irlc/tests/tests_week12.py
 irlc/tests/tests_week13.py

@@ -74,7 +74,7 @@ irlc/exam/exam20*/solution
 #irlc/lectures/lec08
 # irlc/lectures/lec09
 #irlc/lectures/lec10
-irlc/lectures/lec11
+#irlc/lectures/lec11
 irlc/lectures/lec12
 irlc/lectures/lec13


--- a/irlc/lectures/lec10/lecture_10_mc_control.py
+++ b/irlc/lectures/lec10/lecture_10_mc_control.py
@@ -2,12 +2,17 @@
 from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
 from irlc.gridworld.gridworld_environments import BookGridEnvironment
 from irlc.ex10.mc_agent import MCAgent
+# from irlc.lectures.lec10.utils import MCAgentResettable
+
 import numpy as np

 if __name__ == "__main__":
    np.random.seed(433)
-    env = BookGridEnvironment(render_mode='human',zoom=2)
+    env = BookGridEnvironment(render_mode='human',zoom=2, living_reward=-0.05)
    # agent = MCAgent(env, gamma=0.9, epsilon=0.15, alpha=0.1, first_visit=True)
+    from irlc.lectures.lec10.utils import agent_reset
+    MCAgent.reset = agent_reset
    agent = MCAgent(env, gamma=1.0, epsilon=0.15, alpha=None, first_visit=True)
+
    # env, agent = interactive(env, agent)
    keyboard_play(env,agent,method_label='MC control')
--- a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py
+++ b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py
@@ -12,6 +12,8 @@ class CaughtGrid(GridworldEnvironment):
    def __init__(self, **kwargs):
        super().__init__(map, living_reward=1, zoom=1.5, **kwargs)

+
+
 if __name__ == "__main__":
    env = CaughtGrid(view_mode=1, render_mode='human')
    agent = MCEvaluationAgent(env, gamma=1, alpha=None)

--- a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py
+++ b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py
@@ -11,21 +11,6 @@ def keyboard_play(env, agent, method_label='MC',autoplay=False, num_episodes=100
    env.close()


-def automatic_play(env, agent, method_label='MC'):
-    # agent = PlayWrapper(agent, env)
-    env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
-    train(env, agent, num_episodes=1000)
-    env.close()
-
-def automatic_play_value(env, agent, method_label='MC'):
-    agent.label = method_label
-    env, agent = interactive(env, agent)
-
-    # env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('v'), render_kwargs={'method_label': method_label})
-    # agent = PlayWrapper(agent, env)
-    train(env, agent, num_episodes=1000)
-    env.close()
-
 if __name__ == "__main__":
    env = BookGridEnvironment(render_mode='human', zoom=2, living_reward=-0.05)
    from irlc.ex10.mc_agent import MCAgent

--- a/irlc/lectures/lec10/lecture_10_mc_value_every.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_every.py
@@ -6,6 +6,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent

 if __name__ == "__main__":
    env = BookGridEnvironment(view_mode=1, render_mode='human', living_reward=-0.05)
-    agent = MCEvaluationAgent(env, gamma=.9, alpha=None, first_visit=False)
+    agent = MCEvaluationAgent(env, gamma=1, alpha=None, first_visit=False)

    keyboard_play_value(env,agent,method_label='MC every')
--- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py
@@ -6,6 +6,7 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
 import numpy as np
 from irlc import interactive, train

+
 class MCAgentOneState(MCEvaluationAgent):
    def __init__(self, *args, state=None, **kwargs):
        a = 34
@@ -17,18 +18,19 @@ class MCAgentOneState(MCEvaluationAgent):

    def _clear_states(self, val=None):
        for s in self.env.mdp.nonterminal_states:
-            # for a in self.env.mdp.A(s):
-            # self.Q[s,a] = 0
            if s != self.state:
                self.returns_sum_S[s] = val
                self.returns_count_N[s] = val
-
                if s in self.v:
                    k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
                    if not self.env.mdp.is_terminal(k):

                        del self.v[s]

+    def reset(self):
+        from irlc.lectures.lec10.utils import agent_reset
+        agent_reset(self)
+        self._clear_states(None)

    def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
        # self.episode = [e for e in self.episode if e[0] == self.state]
@@ -39,6 +41,7 @@ class MCAgentOneState(MCEvaluationAgent):

 if __name__ == "__main__":
    env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=True, zoom=2)
+
    agent = MCAgentOneState(env, gamma=1, alpha=None, first_visit=True)
    method_label = 'MC (gamma=1)'
    agent.label = method_label
@@ -49,16 +52,3 @@ if __name__ == "__main__":
    num_episodes = 1000
    train(env, agent, num_episodes=num_episodes)
    env.close()
-
-    import matplotlib.pyplot as plt
-    import numpy as np
-
-    import matplotlib.pyplot as plt
-    import numpy as np
-
-    lt = np.linspace(np.log(1000), np.log(2000) + 0*5000)
-    plt.plot(lt, 5 + 2 * np.sqrt(lt / 500), 'k-')
-    plt.plot(lt, 10 + 2 * np.sqrt(lt / (np.exp(lt) - 500)), 'r-')
-    plt.xlabel('log(t)')
-    plt.show()
-    # keyboard_play(env,agent,method_label='MC (alpha=0.5)')
--- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py
+++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py
@@ -7,40 +7,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
 import numpy as np
 from irlc import interactive, train

-# class MCAgentOneState(MCEvaluationAgent):
-#     def __init__(self, *args, state=None, **kwargs):
-#         a = 34
-#         super().__init__(*args, **kwargs)
-#         if state is None:
-#             state = self.env.mdp.initial_state
-#         self.state = state
-#         self._clear_states()
-#
-#     def _clear_states(self, val=None):
-#         for s in self.env.mdp.nonterminal_states:
-#             # for a in self.env.mdp.A(s):
-#             # self.Q[s,a] = 0
-#             if s != self.state:
-#                 self.returns_sum_S[s] = val
-#                 self.returns_count_N[s] = val
-#                 if s in self.v:
-#                     k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
-#                     if not self.env.mdp.is_terminal(k):
-#
-#                         del self.v[s]
-#
-#     def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
-#         # self.episode = [e for e in self.episode if e[0] == self.state]
-#         self._clear_states(0)
-#         super().train(s, a, r, sp, done)
-#         # Clear out many of the state, actions:
-#         self._clear_states(None)
-#         # for s in self.env.mdp.nonterminal_states:
-#         #     if s != self.state:
-#         #         self.v[s] = None
-#
-#         pass
-

 if __name__ == "__main__":
    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)

--- a/irlc/lectures/lec10/lecture_10_td_keyboard.py
+++ b/irlc/lectures/lec10/lecture_10_td_keyboard.py
@@ -2,8 +2,10 @@
 from irlc.lectures.lec10.lecture_10_mc_q_estimation import automatic_play_value
 from irlc.gridworld.gridworld_environments import BookGridEnvironment
 from irlc.ex10.td0_evaluate import TD0ValueAgent
+from irlc.lectures.lec10.utils import agent_reset

 if __name__ == "__main__":
    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    TD0ValueAgent.reset = agent_reset
    agent = TD0ValueAgent(env, gamma=1.0, alpha=0.2)
    automatic_play_value(env,agent,method_label='TD(0)')
--- a/irlc/lectures/lec10/utils.py
+++ b/irlc/lectures/lec10/utils.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex10.mc_agent import MCAgent
+from irlc.ex09.rl_agent import TabularQ
+
+class MCAgentResettable(MCAgent):
+    def reset(self):
+        return agent_reset(self)
+
+def agent_reset(self):
+    # General reset option. Wroks on many agents.
+    attrs = ['returns_sum_S', 'returns_count_N', 'Q', 'v']
+
+    for attr in attrs:
+        if hasattr(self, attr):
+            at = getattr(self, attr)
+            if isinstance(at, dict):
+                at.clear()
+
+    if hasattr(self, 'Q') and isinstance(self.Q, TabularQ):
+        self.Q.q_.clear()
--- a/irlc/lectures/lec11/__init__.py
+++ b/irlc/lectures/lec11/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
--- a/irlc/lectures/lec11/lecture_10_grid_lin_q.py
+++ b/irlc/lectures/lec11/lecture_10_grid_lin_q.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.berkley.rl.semi_grad_q import LinearSemiGradQAgent
+from irlc.ex11.feature_encoder import GridworldXYEncoder
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = LinearSemiGradQAgent(env, gamma=0.95, epsilon=0.1, alpha=.01, q_encoder=GridworldXYEncoder(env))
+    keyboard_play(env, agent, method_label="Q-lin-xy")
--- a/irlc/lectures/lec11/lecture_10_sarsa_open.py
+++ b/irlc/lectures/lec11/lecture_10_sarsa_open.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import OpenGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.sarsa_agent import SarsaAgent
+
+def open_play(Agent, method_label, frames_per_second=30, **args):
+    env = OpenGridEnvironment(render_mode='human', frames_per_second=frames_per_second)
+    agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
+    method_label = f"{method_label} (gamma=0.99, epsilon=0.1, alpha=0.5)"
+    keyboard_play(env, agent, method_label=method_label)
+
+if __name__ == "__main__":
+    open_play(SarsaAgent, method_label="Sarsa")
--- a/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.pacman.pacman_resources import WHITE, BLACK
+from irlc.utils.graphics_util_pygame import GraphicsUtilGym
+from irlc.lectures.lec11.mountain_car_env import MountainCarVisualization
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+
+    # env = gym.make("MountainCar-v0")
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
--- a/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+class RandomWeightAgent(LinearSemiGradSarsa):
+    def train(self, *args, **kwargs):
+        pass
+    pass
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0
+    agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    # agent = Agent(env)
+
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
--- a/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py
+++ b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
+from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
+
+class RandomWeightAgent(LinearSemiGradSarsa):
+    def train(self, *args, **kwargs):
+        super().train(*args, **kwargs)
+        self.Q.w = np.random.randn(self.Q.w.shape[0])
+
+if __name__ == '__main__':
+    from irlc import Agent, interactive, train
+    env = FancyMountainCar(render_mode='human')
+    num_of_tilings = 8
+    alpha = 0.3
+    # env = gym.make("MountainCar-v0")
+    agent = RandomWeightAgent(env) #(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=10)
+
+    env.close()
+
+
+
+    pass
--- a/irlc/lectures/lec11/lecture_11_nstep_open.py
+++ b/irlc/lectures/lec11/lecture_11_nstep_open.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+
+from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
+from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
+
+from irlc.lectures.lec11.lecture_10_sarsa_open import open_play
+if __name__ == "__main__":
+    # env = OpenGridEnvironment()
+    # agent = (env, gamma=0.95, epsilon=0.1, alpha=.5)
+    open_play(SarsaDelayNAgent, method_label="N-step Sarsa n=8", n=8)
--- a/irlc/lectures/lec11/lecture_11_pacman_lin_q.py
+++ b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
+from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
+from irlc.ex11.feature_encoder import SimplePacmanExtractor
+import matplotlib.pyplot as plt
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+# from irlc import PlayWrapper
+from irlc import interactive
+
+def play_pacman(env, agent, layout = 'smallGrid'):
+    train(env, agent, num_episodes=100)
+
+    env2 = PacmanWinWrapper(env)
+
+    # env2 = Monitor(env2, directory="experiments/randomdir", force=True)
+    # env2 = VideoMonitor(env2)
+    env2, agent = interactive(env, agent)
+    agent.epsilon = 0
+    agent.alpha = 0
+    # agent = PlayWrapper(agent, env2)
+    train(env2, agent, num_episodes=100)
+    plt.show()
+    env.close()
+
+if __name__ == "__main__":
+    layout = 'smallGrid'
+    env = PacmanEnvironment(animate_movement=True, layout=layout, render_mode='human', frames_per_second=100)
+    qex = SimplePacmanExtractor(env)
+    agent = LinearSemiGradQAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8, q_encoder=qex)
+    play_pacman(env, agent, layout = 'smallGrid')
+    # main_plot('experiments/q_lin')
--- a/irlc/lectures/lec11/lecture_11_pacman_q.py
+++ b/irlc/lectures/lec11/lecture_11_pacman_q.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+# from irlc.utils.player_wrapper_pyglet import PlayWrapper
+from irlc import main_plot
+import matplotlib.pyplot as plt
+# from irlc.utils.video_monitor import VideoMonitor
+from irlc.ex01.agent import train
+# from irlc.lectures.lecture_09_mc import keyboard_play
+from irlc.ex11.q_agent import QAgent
+from irlc import interactive
+
+
+def play_pacman(env, agent, layout = 'smallGrid'):
+
+    train(env, agent, num_episodes=100)
+    env2 = PacmanWinWrapper(env)
+    # env2 = Monitor(env2, directory="experiments/randomdir", force=True)
+    # env2 = VideoMonitor(env2)
+    env2, agent = interactive(env2, agent)
+    agent.epsilon = 0
+    agent.alpha = 0
+    # agent = PlayWrapper(agent, env2)
+    train(env2, agent, num_episodes=100)
+    plt.show()
+    env.close()
+
+if __name__ == "__main__":
+    layout = 'smallGrid'
+    env = PacmanEnvironment(animate_movement=False, layout=layout, render_mode='human')
+    agent = QAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8)
+    # from irlc import PlayWrapper
+    # agent = PlayWrapper(agent, env)
+    play_pacman(env, agent, layout = 'smallGrid')
+    # main_plot('experiments/q_lin')
--- a/irlc/lectures/lec11/lecture_11_q.py
+++ b/irlc/lectures/lec11/lecture_11_q.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
+from irlc.ex11.q_agent import QAgent
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human')
+    agent = QAgent(env, gamma=0.95, epsilon=0.1, alpha=.2)
+    keyboard_play(env, agent, method_label="Q-learning")
--- a/irlc/lectures/lec11/lecture_11_q_cliff.py
+++ b/irlc/lectures/lec11/lecture_11_q_cliff.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
+from irlc.ex11.q_agent import QAgent
+
+
+# def cliffwalk(env, agent, method_label="method"):
+#     agent = PlayWrapper(agent, env)
+    # env = VideoMonitor(env, agent=agent, fps=100, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
+    # train(env, agent, num_episodes=200)
+    # env.close()
+
+from irlc.lectures.lec11.lecture_11_sarsa_cliff import cliffwalk, gamma, alpha, epsi
+if __name__ == "__main__":
+    import numpy as np
+    np.random.seed(1)
+    env = CliffGridEnvironment2(zoom=.8, render_mode='human')
+    agent = QAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
+    cliffwalk(env, agent, method_label="Q-learning")