diff --git a/.gitignore b/.gitignore index c014141f3718ba121bdf547318bae0ede81995fd..53552e861876c34bcb300016745c39c949987535 100644 --- a/.gitignore +++ b/.gitignore @@ -10,10 +10,10 @@ exam_tabular_examples #solutions/ex07 #solutions/ex08 # solutions/ex09 -solutions/ex10 -solutions/ex11 -solutions/ex12 -solutions/ex13 +#solutions/ex10 +#solutions/ex11 +#solutions/ex12 +#solutions/ex13 #irlc/ex03 #irlc/ex04 @@ -36,8 +36,8 @@ solutions/ex13 #irlc/tests/tests_week07.py #irlc/tests/tests_week08.py # irlc/tests/tests_week09.py -irlc/tests/tests_week10.py -irlc/tests/tests_week11.py +#irlc/tests/tests_week10.py +#irlc/tests/tests_week11.py irlc/tests/tests_week12.py irlc/tests/tests_week13.py @@ -74,7 +74,7 @@ irlc/exam/exam20*/solution #irlc/lectures/lec08 # irlc/lectures/lec09 #irlc/lectures/lec10 -irlc/lectures/lec11 +#irlc/lectures/lec11 irlc/lectures/lec12 irlc/lectures/lec13 diff --git a/irlc/lectures/lec10/lecture_10_mc_control.py b/irlc/lectures/lec10/lecture_10_mc_control.py index e286478a8cabf26f4878528a1fbc0f402e5c25ef..b727d364088ca893ac99ea5bffa0302eb8b2e48c 100644 --- a/irlc/lectures/lec10/lecture_10_mc_control.py +++ b/irlc/lectures/lec10/lecture_10_mc_control.py @@ -2,12 +2,17 @@ from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play from irlc.gridworld.gridworld_environments import BookGridEnvironment from irlc.ex10.mc_agent import MCAgent +# from irlc.lectures.lec10.utils import MCAgentResettable + import numpy as np if __name__ == "__main__": np.random.seed(433) - env = BookGridEnvironment(render_mode='human',zoom=2) + env = BookGridEnvironment(render_mode='human',zoom=2, living_reward=-0.05) # agent = MCAgent(env, gamma=0.9, epsilon=0.15, alpha=0.1, first_visit=True) + from irlc.lectures.lec10.utils import agent_reset + MCAgent.reset = agent_reset agent = MCAgent(env, gamma=1.0, epsilon=0.15, alpha=None, first_visit=True) + # env, agent = interactive(env, agent) keyboard_play(env,agent,method_label='MC control') diff --git a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py index c111aa624334fe8611d496bff8bd41ca0dd01ee4..32b7afad787653e53bd9c18c869814429ef81bf1 100644 --- a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py +++ b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py @@ -12,6 +12,8 @@ class CaughtGrid(GridworldEnvironment): def __init__(self, **kwargs): super().__init__(map, living_reward=1, zoom=1.5, **kwargs) + + if __name__ == "__main__": env = CaughtGrid(view_mode=1, render_mode='human') agent = MCEvaluationAgent(env, gamma=1, alpha=None) diff --git a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py index 4ba40a2c3ac4ad28e12c4b0ee1b16d76c9adc667..bdba3e189f5f35f38f8f3ad73c35ab6e70d8228c 100644 --- a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py +++ b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py @@ -11,21 +11,6 @@ def keyboard_play(env, agent, method_label='MC',autoplay=False, num_episodes=100 env.close() -def automatic_play(env, agent, method_label='MC'): - # agent = PlayWrapper(agent, env) - env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) - train(env, agent, num_episodes=1000) - env.close() - -def automatic_play_value(env, agent, method_label='MC'): - agent.label = method_label - env, agent = interactive(env, agent) - - # env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('v'), render_kwargs={'method_label': method_label}) - # agent = PlayWrapper(agent, env) - train(env, agent, num_episodes=1000) - env.close() - if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', zoom=2, living_reward=-0.05) from irlc.ex10.mc_agent import MCAgent diff --git a/irlc/lectures/lec10/lecture_10_mc_value_every.py b/irlc/lectures/lec10/lecture_10_mc_value_every.py index 8598fa5e78834d5337f33217a21eeb7694af587e..d42c5ac8aa80eca9d1b82a5cb56aa35436d1b665 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_every.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_every.py @@ -6,6 +6,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent if __name__ == "__main__": env = BookGridEnvironment(view_mode=1, render_mode='human', living_reward=-0.05) - agent = MCEvaluationAgent(env, gamma=.9, alpha=None, first_visit=False) + agent = MCEvaluationAgent(env, gamma=1, alpha=None, first_visit=False) keyboard_play_value(env,agent,method_label='MC every') diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py index c998543f234744811dbbf68613dce641776f1934..17406612468718a3279716d5d2791966cf48a027 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py @@ -6,6 +6,7 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent import numpy as np from irlc import interactive, train + class MCAgentOneState(MCEvaluationAgent): def __init__(self, *args, state=None, **kwargs): a = 34 @@ -17,18 +18,19 @@ class MCAgentOneState(MCEvaluationAgent): def _clear_states(self, val=None): for s in self.env.mdp.nonterminal_states: - # for a in self.env.mdp.A(s): - # self.Q[s,a] = 0 if s != self.state: self.returns_sum_S[s] = val self.returns_count_N[s] = val - if s in self.v: k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0] if not self.env.mdp.is_terminal(k): del self.v[s] + def reset(self): + from irlc.lectures.lec10.utils import agent_reset + agent_reset(self) + self._clear_states(None) def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None): # self.episode = [e for e in self.episode if e[0] == self.state] @@ -39,6 +41,7 @@ class MCAgentOneState(MCEvaluationAgent): if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=True, zoom=2) + agent = MCAgentOneState(env, gamma=1, alpha=None, first_visit=True) method_label = 'MC (gamma=1)' agent.label = method_label @@ -49,16 +52,3 @@ if __name__ == "__main__": num_episodes = 1000 train(env, agent, num_episodes=num_episodes) env.close() - - import matplotlib.pyplot as plt - import numpy as np - - import matplotlib.pyplot as plt - import numpy as np - - lt = np.linspace(np.log(1000), np.log(2000) + 0*5000) - plt.plot(lt, 5 + 2 * np.sqrt(lt / 500), 'k-') - plt.plot(lt, 10 + 2 * np.sqrt(lt / (np.exp(lt) - 500)), 'r-') - plt.xlabel('log(t)') - plt.show() - # keyboard_play(env,agent,method_label='MC (alpha=0.5)') diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py index 6567221b84c2df45f4c73f7921df5173c7e66608..4f7c8d223c5fad57c970144ed17aa326eb0bc7cc 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py @@ -7,40 +7,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent import numpy as np from irlc import interactive, train -# class MCAgentOneState(MCEvaluationAgent): -# def __init__(self, *args, state=None, **kwargs): -# a = 34 -# super().__init__(*args, **kwargs) -# if state is None: -# state = self.env.mdp.initial_state -# self.state = state -# self._clear_states() -# -# def _clear_states(self, val=None): -# for s in self.env.mdp.nonterminal_states: -# # for a in self.env.mdp.A(s): -# # self.Q[s,a] = 0 -# if s != self.state: -# self.returns_sum_S[s] = val -# self.returns_count_N[s] = val -# if s in self.v: -# k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0] -# if not self.env.mdp.is_terminal(k): -# -# del self.v[s] -# -# def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None): -# # self.episode = [e for e in self.episode if e[0] == self.state] -# self._clear_states(0) -# super().train(s, a, r, sp, done) -# # Clear out many of the state, actions: -# self._clear_states(None) -# # for s in self.env.mdp.nonterminal_states: -# # if s != self.state: -# # self.v[s] = None -# -# pass - if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05) diff --git a/irlc/lectures/lec10/lecture_10_td_keyboard.py b/irlc/lectures/lec10/lecture_10_td_keyboard.py index 8787900face05cca2791b80d72fc51323dec2392..d1c9d9d9f921cb6306549d4a1769f4104fd10413 100644 --- a/irlc/lectures/lec10/lecture_10_td_keyboard.py +++ b/irlc/lectures/lec10/lecture_10_td_keyboard.py @@ -2,8 +2,10 @@ from irlc.lectures.lec10.lecture_10_mc_q_estimation import automatic_play_value from irlc.gridworld.gridworld_environments import BookGridEnvironment from irlc.ex10.td0_evaluate import TD0ValueAgent +from irlc.lectures.lec10.utils import agent_reset if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + TD0ValueAgent.reset = agent_reset agent = TD0ValueAgent(env, gamma=1.0, alpha=0.2) automatic_play_value(env,agent,method_label='TD(0)') diff --git a/irlc/lectures/lec10/utils.py b/irlc/lectures/lec10/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fc894541b47de7124844437e2a5a572b5ff392d7 --- /dev/null +++ b/irlc/lectures/lec10/utils.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex10.mc_agent import MCAgent +from irlc.ex09.rl_agent import TabularQ + +class MCAgentResettable(MCAgent): + def reset(self): + return agent_reset(self) + +def agent_reset(self): + # General reset option. Wroks on many agents. + attrs = ['returns_sum_S', 'returns_count_N', 'Q', 'v'] + + for attr in attrs: + if hasattr(self, attr): + at = getattr(self, attr) + if isinstance(at, dict): + at.clear() + + if hasattr(self, 'Q') and isinstance(self.Q, TabularQ): + self.Q.q_.clear() diff --git a/irlc/lectures/lec11/__init__.py b/irlc/lectures/lec11/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a56057c84d0ceac54aab1d40ba0f370c77fe10be --- /dev/null +++ b/irlc/lectures/lec11/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec11/lecture_10_grid_lin_q.py b/irlc/lectures/lec11/lecture_10_grid_lin_q.py new file mode 100644 index 0000000000000000000000000000000000000000..659201d8487242b35aaa56cde863327a2d341595 --- /dev/null +++ b/irlc/lectures/lec11/lecture_10_grid_lin_q.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.berkley.rl.semi_grad_q import LinearSemiGradQAgent +from irlc.ex11.feature_encoder import GridworldXYEncoder +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = LinearSemiGradQAgent(env, gamma=0.95, epsilon=0.1, alpha=.01, q_encoder=GridworldXYEncoder(env)) + keyboard_play(env, agent, method_label="Q-lin-xy") diff --git a/irlc/lectures/lec11/lecture_10_sarsa_open.py b/irlc/lectures/lec11/lecture_10_sarsa_open.py new file mode 100644 index 0000000000000000000000000000000000000000..5793603a4e00f1dba5cdfe30343ca2f3d2e155d3 --- /dev/null +++ b/irlc/lectures/lec11/lecture_10_sarsa_open.py @@ -0,0 +1,13 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import OpenGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.sarsa_agent import SarsaAgent + +def open_play(Agent, method_label, frames_per_second=30, **args): + env = OpenGridEnvironment(render_mode='human', frames_per_second=frames_per_second) + agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args) + method_label = f"{method_label} (gamma=0.99, epsilon=0.1, alpha=0.5)" + keyboard_play(env, agent, method_label=method_label) + +if __name__ == "__main__": + open_play(SarsaAgent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py new file mode 100644 index 0000000000000000000000000000000000000000..1c99f0330abd3612e5ee12064c8beaa0f6be7d5f --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.pacman.pacman_resources import WHITE, BLACK +from irlc.utils.graphics_util_pygame import GraphicsUtilGym +from irlc.lectures.lec11.mountain_car_env import MountainCarVisualization +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + + # env = gym.make("MountainCar-v0") + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py new file mode 100644 index 0000000000000000000000000000000000000000..bb94976cd04de92cdbee2b403a408e468182fcc8 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +class RandomWeightAgent(LinearSemiGradSarsa): + def train(self, *args, **kwargs): + pass + pass + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0 + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py new file mode 100644 index 0000000000000000000000000000000000000000..e8709431847cbbbd30552ca2be76f1c94431fa67 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +class RandomWeightAgent(LinearSemiGradSarsa): + def train(self, *args, **kwargs): + super().train(*args, **kwargs) + self.Q.w = np.random.randn(self.Q.w.shape[0]) + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + # env = gym.make("MountainCar-v0") + agent = RandomWeightAgent(env) #(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_nstep_open.py b/irlc/lectures/lec11/lecture_11_nstep_open.py new file mode 100644 index 0000000000000000000000000000000000000000..ab672b2067bcd2dfeed17a719dad1c42186c32a0 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_nstep_open.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor + +from irlc.ex11.nstep_sarsa_agent import SarsaNAgent +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent + +from irlc.lectures.lec11.lecture_10_sarsa_open import open_play +if __name__ == "__main__": + # env = OpenGridEnvironment() + # agent = (env, gamma=0.95, epsilon=0.1, alpha=.5) + open_play(SarsaDelayNAgent, method_label="N-step Sarsa n=8", n=8) diff --git a/irlc/lectures/lec11/lecture_11_pacman_lin_q.py b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py new file mode 100644 index 0000000000000000000000000000000000000000..3b7e121efe6485e2529359a5979091cfc207cd1a --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py @@ -0,0 +1,32 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex11.semi_grad_q import LinearSemiGradQAgent +from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper +from irlc.ex11.feature_encoder import SimplePacmanExtractor +import matplotlib.pyplot as plt +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +# from irlc import PlayWrapper +from irlc import interactive + +def play_pacman(env, agent, layout = 'smallGrid'): + train(env, agent, num_episodes=100) + + env2 = PacmanWinWrapper(env) + + # env2 = Monitor(env2, directory="experiments/randomdir", force=True) + # env2 = VideoMonitor(env2) + env2, agent = interactive(env, agent) + agent.epsilon = 0 + agent.alpha = 0 + # agent = PlayWrapper(agent, env2) + train(env2, agent, num_episodes=100) + plt.show() + env.close() + +if __name__ == "__main__": + layout = 'smallGrid' + env = PacmanEnvironment(animate_movement=True, layout=layout, render_mode='human', frames_per_second=100) + qex = SimplePacmanExtractor(env) + agent = LinearSemiGradQAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8, q_encoder=qex) + play_pacman(env, agent, layout = 'smallGrid') + # main_plot('experiments/q_lin') diff --git a/irlc/lectures/lec11/lecture_11_pacman_q.py b/irlc/lectures/lec11/lecture_11_pacman_q.py new file mode 100644 index 0000000000000000000000000000000000000000..7a51a0679ae8ee815a34df28dedb721b5632ebee --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_pacman_q.py @@ -0,0 +1,35 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor +# from irlc.utils.player_wrapper_pyglet import PlayWrapper +from irlc import main_plot +import matplotlib.pyplot as plt +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +# from irlc.lectures.lecture_09_mc import keyboard_play +from irlc.ex11.q_agent import QAgent +from irlc import interactive + + +def play_pacman(env, agent, layout = 'smallGrid'): + + train(env, agent, num_episodes=100) + env2 = PacmanWinWrapper(env) + # env2 = Monitor(env2, directory="experiments/randomdir", force=True) + # env2 = VideoMonitor(env2) + env2, agent = interactive(env2, agent) + agent.epsilon = 0 + agent.alpha = 0 + # agent = PlayWrapper(agent, env2) + train(env2, agent, num_episodes=100) + plt.show() + env.close() + +if __name__ == "__main__": + layout = 'smallGrid' + env = PacmanEnvironment(animate_movement=False, layout=layout, render_mode='human') + agent = QAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8) + # from irlc import PlayWrapper + # agent = PlayWrapper(agent, env) + play_pacman(env, agent, layout = 'smallGrid') + # main_plot('experiments/q_lin') diff --git a/irlc/lectures/lec11/lecture_11_q.py b/irlc/lectures/lec11/lecture_11_q.py new file mode 100644 index 0000000000000000000000000000000000000000..d3df9dbb8f1836bfbe0c622be1212acbb57b6367 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.q_agent import QAgent + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = QAgent(env, gamma=0.95, epsilon=0.1, alpha=.2) + keyboard_play(env, agent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_q_cliff.py b/irlc/lectures/lec11/lecture_11_q_cliff.py new file mode 100644 index 0000000000000000000000000000000000000000..421db1fa16764a3b432bd03d4a072f2108dabe77 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q_cliff.py @@ -0,0 +1,18 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2 +from irlc.ex11.q_agent import QAgent + + +# def cliffwalk(env, agent, method_label="method"): +# agent = PlayWrapper(agent, env) + # env = VideoMonitor(env, agent=agent, fps=100, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) + # train(env, agent, num_episodes=200) + # env.close() + +from irlc.lectures.lec11.lecture_11_sarsa_cliff import cliffwalk, gamma, alpha, epsi +if __name__ == "__main__": + import numpy as np + np.random.seed(1) + env = CliffGridEnvironment2(zoom=.8, render_mode='human') + agent = QAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha) + cliffwalk(env, agent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_q_open.py b/irlc/lectures/lec11/lecture_11_q_open.py new file mode 100644 index 0000000000000000000000000000000000000000..f0a35a5ba17fde85fb2b10da97413aba4879c5c6 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q_open.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld_pyglet.gridworld_environments import OpenGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.q_agent import QAgent + +def open_play(Agent, method_label, **args): + env = OpenGridEnvironment() + agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args) + keyboard_play(env, agent, method_label=method_label) + +if __name__ == "__main__": + open_play(QAgent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_sarsa.py b/irlc/lectures/lec11/lecture_11_sarsa.py new file mode 100644 index 0000000000000000000000000000000000000000..791a1b4869b21c64baa09ee575019799da66f2e7 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_sarsa.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.96, n=1) + keyboard_play(env, agent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/lecture_11_sarsa_cliff.py b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py new file mode 100644 index 0000000000000000000000000000000000000000..3d250fa581975dbbc9fbf1fd2afebd5814c6b6e3 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py @@ -0,0 +1,33 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.utils.player_wrapper_pyglet import PlayWrapper +from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2 +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +from irlc import interactive +from irlc.ex11.sarsa_agent import SarsaAgent + + +def cliffwalk(env, agent, method_label="method"): + # agent = PlayWrapper(agent, env) + env.label = method_label + agent.method_label = method_label + agent.label = method_label + agent.method = method_label + + + env, agent = interactive(env, agent) + # env = VideoMonitor(env, agent=agent, fps=200, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) + train(env, agent, num_episodes=1000) + env.close() + +epsi = 0.5 +gamma = 1.0 +alpha = .3 + +if __name__ == "__main__": + import numpy as np + np.random.seed(1) + env = CliffGridEnvironment2(zoom=.8, render_mode='human') + agent = SarsaAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha) + # agent = QAgent(env, gamma=0.95, epsilon=0.5, alpha=.2) + cliffwalk(env, agent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/mountain_car_env.py b/irlc/lectures/lec11/mountain_car_env.py new file mode 100644 index 0000000000000000000000000000000000000000..c105e3aca23cbc864c96e2c7ab14a41aa8a1b53a --- /dev/null +++ b/irlc/lectures/lec11/mountain_car_env.py @@ -0,0 +1,326 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from gymnasium.envs.classic_control import MountainCarEnv +import math +from typing import Optional +import numpy as np +import gymnasium as gym +from gymnasium import spaces +from gymnasium.envs.classic_control import utils +from gymnasium.error import DependencyNotInstalled + +class FancyMountainCar(MountainCarEnv): # piggybag on the original env. + visualization = None + + def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): + super().__init__(render_mode=render_mode, goal_velocity=goal_velocity) + + def render(self): + if self.visualization is None: + self.visualization = MountainCarVisualization(self, self.agent if hasattr(self, 'agent') else None) + return self.visualization.render() + + def close(self): + if self.visualization is not None: + self.visualization.close() + + +from irlc.pacman.pacman_resources import WHITE, BLACK +from irlc.utils.graphics_util_pygame import GraphicsUtilGym +class MountainCarVisualization: + def __init__(self, env, agent): + self.env = env + self.agent = agent + + # self.k = 0 + # self.states = [] + # self.actions = [] + # self.factories = [] + # self.inventory = inventory + # xmin = -0.2 + # xmax = inventory.N * 2 + 1.4 + # xmax = 4 + + # ymin = -0.4 + # ymax = 1.4 + 0.2 + # dx = xmax - xmin + # dy = ymax - ymin + self.ga = GraphicsUtilGym() + # screen_width = 1300 + screen_width = env.screen_width * 2 + # + # -env.min_position + # env.max_position + + xmin = env.min_position + xmax = env.max_position + 1.8 + # env._height + + screen_height = env.screen_height + ymin = 0 + ymax = 1.2 + # screen_height = dy * (screen_width / dx) + frames_per_second = 30 + self.ga.begin_graphics(screen_width, screen_height, + local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second, + color=WHITE, title=f"MountainCar Environment") + + # self.last_action = None + # self.agent = None + # self.last_reward = None + # self.scale = screen_width / dx + + x_cache = [] + + + def render(self): + # if self.env.render_mode is None: + # assert self.env.spec is not None + # gym.logger.warn( + # "You are calling render method without specifying any render mode. " + # "You can specify the render_mode at initialization, " + # f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' + # ) + # return + # try: + # import pygame + # from pygame import gfxdraw + # except ImportError as e: + # raise DependencyNotInstalled( + # 'pygame is not installed, run `pip install "gymnasium[classic_control]"`' + # ) from e + + # + # + # if self.screen is None: + # pygame.init() + # if self.render_mode == "human": + # pygame.display.init() + # self.screen = pygame.display.set_mode( + # (self.screen_width, self.screen_height) + # ) + # else: # mode in "rgb_array" + # self.screen = pygame.Surface((self.screen_width, self.screen_height)) + # if self.clock is None: + # self.clock = pygame.time.Clock() + self.ga.draw_background() + # self.ga.circle("sadf", pos=(0,0), r=100, fillColor=(100, 10, 50)) + + pos = self.env.state[0] + scale = 1 + + xs = np.linspace(self.env.min_position, self.env.max_position, 100) + ys = self.env._height(xs) + # xys = list(zip((xs - self.env.min_position) * scale, ys * scale)) + + self.ga.polyline("asdfasfd", xs=xs, ys=ys, width=1) + + + # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) + + + + world_width = self.env.max_position - self.env.min_position + # scale = self.screen_width / world_width + rscale = self.env.screen_width / world_width + + carwidth = 40 / rscale + carheight = 20 / rscale + + # self.surf = pygame.Surface((self.screen_width, self.screen_height)) + # self.surf.fill((255, 255, 255)) + + # pos = self.state[0] + + # xs = np.linspace(self.min_position, self.max_position, 100) + # ys = self._height(xs) + # xys = list(zip((xs - self.min_position) * scale, ys * scale)) + + # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) + import pygame + clearance = 10 / rscale + # clearance=0.01 + + l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0 + coords = [] + for c in [(l, b), (l, t), (r, t), (r, b)]: + c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) + coords.append( + ( + c[0] + (pos - 0*self.env.min_position) * scale, + c[1] + clearance + self.env._height(pos) * scale, + ) + ) + self.ga.polygon("adsfasdf", coords=coords, outlineColor=BLACK, fillColor=BLACK, width=2) + # gfxdraw.aapolygon(self.surf, coords, (0, 0, 0)) + # gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0)) + + + for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]: + c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) + wheel = ( + c[0] + (pos - 0*self.env.min_position) * scale, + c[1] + clearance + self.env._height(pos) * scale, + ) + + # gfxdraw.aacircle( + # self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) + # ) + + self.ga.circle("asdf", (wheel[0], wheel[1]), int(carheight / 2.5*rscale), fillColor=(128, 128, 128), outlineColor= (70, 70, 70)) + # + # gfxdraw.filled_circle( + # self.surf, wheel[0], wheel[1], int(carheight / 2.5 * rscale), (128, 128, 128) + # ) + + flagx = (self.env.goal_position - 0*self.env.min_position) * scale + flagy1 = self.env._height(self.env.goal_position) * scale + flagy2 = flagy1 + 50/rscale + self.ga.line("asdfasdf", (flagx, flagy1), (flagx, flagy2), color=(0, 0, 0)) + + self.ga.polygon( + "sdfasdf", + [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)], + (204, 204, 0), + ) + # gfxdraw.aapolygon( + # self.surf, + # [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)], + # (204, 204, 0), + # ) + # gfxdraw.filled_polygon( + # self.surf, + # [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5)], + # (204, 204, 0), + # ) + # Optionally draw the value functino. + # oxmin = 0.6 + # oxmax = 1.7 + # oymin = 0 + # oymax = 1 + + # self.env.observation_space + # dx = 1.5 + # dy = 0 + + # sX = 1 + # sY = 1 + + # Pscale = 1 + Vscale = 6 + + # def pos2s(pos):#, vel): + # return pos + 1.8 #, (vel + 0.2) * 3 + # def vel2s(vel): + # return (vel + 0.) * Vscale + + def x2s(pos, vel): + return pos + 1.75, (vel + 0.1) * Vscale + + xmin,ymin = x2s(self.env.observation_space.low[0], self.env.observation_space.low[1] ) + xmax,ymax = x2s(self.env.observation_space.high[0], self.env.observation_space.high[1] ) + + px, py = x2s( *np.asarray(self.env.state).tolist()) + + + + # self.env.observation_space.low + if self.agent is not None: + + def colfunc(val, minval, maxval, startcolor, stopcolor): + """ Convert value in the range minval...maxval to a color in the range + startcolor to stopcolor. The colors passed and the one returned are + composed of a sequence of N component values (e.g. RGB). + """ + f = float(val - minval) / (maxval - minval) + return tuple( float( f * (b - a) + a) for (a, b) in zip(startcolor, stopcolor)) + + RED, YELLOW, GREEN = (1, 0, 0), (1, 1, 0), (0, 1, 0) + CYAN, BLUE, MAGENTA = (0, 1, 1), (0, 0, 1), (1, 0, 1) + steps = 10 + minval, maxval = 0.0, 1.0 + # incr = (maxval - minval) / steps + # for i in range(steps + 1): + # val = minval + round(i * incr, 1) + # # print('{:.1f} -> ({:.3f}, {:.3f}, {:.3f})'.format( + # # val, *colfunc(val, minval, maxval, BLUE, RED))) + + value_function = lambda s: -max(self.agent.Q.get_Qs(s)[1]) + + grid_size = 40 + # grid_size = 30 + low = self.env.unwrapped.observation_space.low + high = self.env.unwrapped.observation_space.high + X, Y = np.meshgrid(np.linspace(low[0], high[0], grid_size), np.linspace(low[1], high[1], grid_size)) + Z = X * 0 + + if self.x_cache is None or len(self.x_cache) == 0: + for i, (x, y) in enumerate(zip(X.flat, Y.flat)): + s = (x, y) + xx = [self.agent.Q.x(s, a) for a in range(self.env.action_space.n) ] + self.x_cache.append(xx) + # Z.flat[i] = value_function((x, y)) + pass + # for i, (x, y) in enumerate(zip(X.flat, Y.flat)): + # # [max([float(self.agent.Q.w @ dx) for dx in xx]) for xx in self.x_cache] + # + # + # + # Z.flat[i] = value_function((x, y)) + # pass + for i in range(len(self.x_cache)): + Z.flat[i] = max([float(self.agent.Q.w @ dx) for dx in self.x_cache[i]]) + pass + + for i in range(len(Z.flat)): + ddx = (X.max() - X.min()) / (grid_size-1) + ddy = (Y.max() - Y.min()) / (grid_size-1) + + z = colfunc(Z.flat[i], Z.min(), Z.max()+0.01, BLUE, RED) + + z = tuple( int(x*255) for x in z) + + xmin, ymin = x2s(X.flat[i], Y.flat[i]) + xmax, ymax = x2s(X.flat[i]+ddx, Y.flat[i]+ddy) + + self.ga.rectangle(color=z, x=xmin, y=ymin, width=xmax-xmin, height=ymax-ymin) + pass + # colfunc(val, minval, maxval, startcolor, stopcolor): + + self.ga.rectangle(color=BLACK, x=xmin, y=ymin, width=xmax - xmin, height=ymax - ymin, border=1) + self.ga.circle("asdf", (px, py), r=5, fillColor=(200, 200, 200)) + + return self.ga.blit(render_mode=self.env.render_mode) + + # self.surf = pygame.transform.flip(self.surf, False, True) + # self.screen.blit(self.surf, (0, 0)) + # if self.render_mode == "human": + # pygame.event.pump() + # self.clock.tick(self.metadata["render_fps"]) + # pygame.display.flip() + # + # elif self.render_mode == "rgb_array": + # return np.transpose( + # np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) + # ) + + def close(self): + self.ga.close() + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + # env = gym.make("MountainCar-v0") + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/sarsa_nstep.py b/irlc/lectures/lec11/sarsa_nstep.py new file mode 100644 index 0000000000000000000000000000000000000000..7687d1736244fd5531c35cd54ebdac7c25fc0a61 --- /dev/null +++ b/irlc/lectures/lec11/sarsa_nstep.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent +from irlc import interactive, train + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = SarsaDelayNAgent(env, gamma=1, epsilon=0.1, alpha=0.9, n=1) # Exam problem. + # agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.2, n=1) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) diff --git a/irlc/tests/tests_week10.py b/irlc/tests/tests_week10.py new file mode 100644 index 0000000000000000000000000000000000000000..b5dd4e6580fd2cd8dcebf7de0ba5f90e9edd9ca8 --- /dev/null +++ b/irlc/tests/tests_week10.py @@ -0,0 +1,132 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex10.question_td0 import a_compute_deltas, b_perform_td0, c_perform_td0_batched +from unitgrade import Report, UTestCase, cache +from irlc import train +import irlc.ex10.envs +import gymnasium as gym +from gymnasium.wrappers import TimeLimit +from irlc.tests.tests_week08 import train_recording + + +class MCAgentQuestion(UTestCase): + """ Test of MC agent """ + def get_env_agent(self): + from irlc.ex10.mc_agent import MCAgent + env = gym.make("SmallGridworld-v0") + env = TimeLimit(env, max_episode_steps=1000) + gamma = .8 + agent = MCAgent(env, gamma=gamma, first_visit=True) + return env, agent + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.Q.to_dict() + + def test_Q_function(self): + trajectories, Q = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + Qc = [] + Qe = [] + for s, qa in Q.items(): + for a,q in qa.items(): + Qe.append(q) + Qc.append(agent.Q[s,a]) + + self.assertL2(Qe, Qc, tol=1e-5) + + +# class BlackjackQuestion(UTestCase): +# """ MC policy evaluation agent and Blacjack """ +# def test_blackjack_mc(self): +# env = gym.make("Blackjack-v1") +# episodes = 50000 +# from irlc.ex10.mc_evaluate import MCEvaluationAgent +# from irlc.ex10.mc_evaluate_blackjack import get_by_ace, to_matrix, policy20 +# agent = MCEvaluationAgent(env, policy=policy20, gamma=1) +# train(env, agent, num_episodes=episodes) +# w = get_by_ace(agent.v, ace=True) +# X, Y, Z = to_matrix(w) +# print(Z) +# print(Z.dtype) +# self.assertL2(Z, tol=2.5) + + +class TD0Question(UTestCase): + """ Test of TD(0) evaluation agent """ + gamma = 0.8 + + def get_env_agent(self): + from irlc.ex10.td0_evaluate import TD0ValueAgent + env = gym.make("SmallGridworld-v0") + # env = TimeLimit(env, max_episode_steps=1000) + agent = TD0ValueAgent(env, gamma=self.gamma) + return env, agent + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.v + + def test_value_function(self): + # for k in range(1000): + trajectories, v = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + Qc = [] + Qe = [] + for s, value in v.items(): + Qe.append(value) + Qc.append(agent.v[s]) + + self.assertL2(Qe, Qc, tol=1e-5) + +class MCEvaluationQuestion(TD0Question): + """ Test of MC evaluation agent """ + def get_env_agent(self): + from irlc.ex10.mc_evaluate import MCEvaluationAgent + env = gym.make("SmallGridworld-v0") + env = TimeLimit(env, max_episode_steps=1000) + gamma = .8 + agent = MCEvaluationAgent(env, gamma=gamma, first_visit=True) + return env, agent + + +class ExamQuestionTD0(UTestCase): + + def get_problem(self): + states = [1, 0, 2, -1, 2, 4, 5, 4, 3, 2, 1, -1] + rewards = [1, 1, -1, 0, 1, 2, 2, 0, 0, -1, 1] + v = {s: 0 for s in states} + gamma = 0.9 + alpha = 0.2 + return v, states, rewards, gamma, alpha + + def test_a(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(a_compute_deltas(v, states, rewards, gamma)) + + def test_b(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(b_perform_td0(v, states, rewards, gamma, alpha)) + + def test_c(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(c_perform_td0_batched(v, states, rewards, gamma, alpha)) +class Week10Tests(Report): + title = "Tests for week 10" + pack_imports = [irlc] + individual_imports = [] + questions = [(MCAgentQuestion, 10), + (MCEvaluationQuestion, 10), + # (BlackjackQuestion,5), + (TD0Question, 10), + (ExamQuestionTD0, 10), + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week10Tests()) diff --git a/irlc/tests/tests_week11.py b/irlc/tests/tests_week11.py new file mode 100644 index 0000000000000000000000000000000000000000..1f58dd129daaca02dc7c468b120640362aad8c41 --- /dev/null +++ b/irlc/tests/tests_week11.py @@ -0,0 +1,200 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import UTestCase, Report, cache +import numpy as np +from irlc import train +import irlc.ex10.envs +import gymnasium as gym +from irlc.tests.tests_week08 import train_recording +from irlc.tests.tests_week10 import TD0Question, MCAgentQuestion + + +# This problem no longer exists. +# class NStepSarseEvaluationQuestion(TD0Question): +# """ Test of TD-n evaluation agent """ +# # class EvaluateTabular(VExperienceItem): +# # title = "Value-function test" +# gamma = 0.8 +# def get_env_agent(self): +# envn = "SmallGridworld-v0" +# from irlc.ex11.nstep_td_evaluate import TDnValueAgent +# env = gym.make(envn) +# agent = TDnValueAgent(env, gamma=self.gamma, n=5) +# return env, agent + + + +class QAgentQuestion(MCAgentQuestion): + """ Test of Q Agent """ + # class EvaluateTabular(QExperienceItem): + # title = "Q-value test" + + def get_env_agent(self): + from irlc.ex11.q_agent import QAgent + env = gym.make("SmallGridworld-v0") + agent = QAgent(env, gamma=.8) + return env, agent + + +# class LinearWeightVectorTest(UTestCase): + + + +# class LinearValueFunctionTest(LinearWeightVectorTest): +# title = "Linear value-function test" +# def compute_answer_print(self): +# trajectories, Q = self.precomputed_payload() +# env, agent = self.get_env_agent() +# train_recording(env, agent, trajectories) +# self.Q = Q +# self.question.agent = agent +# vfun = [agent.Q[s,a] for s, a in zip(trajectories[0].state, trajectories[0].action)] +# return vfun + +# class TabularAgentStub(UTestCase): +# +# pass + +class TabularAgentStub(UTestCase): + """ Average return over many simulated episodes """ + gamma = 0.95 + epsilon = 0.2 + tol = 0.1 + tol_qs = 0.3 + episodes = 9000 + + def get_env(self): + return gym.make("SmallGridworld-v0") + + def get_env_agent(self): + raise NotImplementedError() + # from irlc.ex11.sarsa_agent import SarsaAgent + # agent = SarsaAgent(self.get_env(), gamma=self.gamma) + # return agent.env, agent + + def get_trained_agent(self): + env, agent = self.get_env_agent() + stats, _ = train(env, agent, num_episodes=self.episodes) + return agent, stats + + def chk_accumulated_reward(self): + agent, stats = self.get_trained_agent() + s0, _ = agent.env.reset() + actions, qs = agent.Q.get_Qs(s0) + print("Tolerance is", self.tol_qs) + self.assertL2(qs, tol=self.tol_qs) + self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol) + + # def test_accumulated_reward(self): + # env, agent = self.get_env_agent() + # stats, _ = train(env, agent, num_episodes=5000) + # s = env.reset() + # actions, qs = agent.Q.get_Qs(s) + # self.assertL2(qs, tol=0.3) + # self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol) + +class SarsaQuestion(TabularAgentStub): + + + def get_env_agent(self): + from irlc.ex11.sarsa_agent import SarsaAgent + agent = SarsaAgent(self.get_env(), gamma=self.gamma) + return agent.env, agent + + def test_accumulated_reward(self): + self.tol_qs = 2.7 # Got 2.65 in one run. + self.chk_accumulated_reward() + + +class NStepSarsaQuestion(TabularAgentStub): + title = "N-step Sarsa" + # class SarsaReturnItem(SarsaQuestion): + def get_env_agent(self): + from irlc.ex11.nstep_sarsa_agent import SarsaNAgent + agent = SarsaNAgent(self.get_env(), gamma=self.gamma, n=5) + return agent.env, agent + + def test_accumulated_reward(self): + self.tol_qs = 2.7 + self.chk_accumulated_reward() + + +class LinearAgentStub(UTestCase): + # class LinearExperienceItem(LinearWeightVectorTest): + tol = 1e-6 + # title = "Linear sarsa agent" + alpha = 0.08 + num_episodes = 300 + # title = "Weight-vector test" + # testfun = QPrintItem.assertL2 + gamma = 0.8 + tol_w = 1e-5 + + + def get_env_agent(self): + raise NotImplementedError() + + def get_env(self): + return gym.make("MountainCar500-v0") + + # def get_env_agent(self): + # return None, None + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.Q.w + + def chk_Q_weight_vector_w(self): + trajectories, w = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + print(w) + print(agent.Q.w) + self.assertL2(agent.Q.w, w, tol=self.tol_w) + + pass +class LinearSarsaAgentQuestion(LinearAgentStub): + """ Sarsa Agent with linear function approximators """ + + def get_env_agent(self): + env = self.get_env() + from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + agent = LinearSemiGradSarsa(env, gamma=1, alpha=self.alpha, epsilon=0) + return env, agent + + def test_Q_weight_vector_w(self): + self.tol_w = 1.4 + self.chk_Q_weight_vector_w() + +class LinearQAgentQuestion(LinearAgentStub): + """ Test of Linear Q Agent """ + + def get_env_agent(self): + env = self.get_env() + alpha = 0.1 + from irlc.ex11.semi_grad_q import LinearSemiGradQAgent + agent = LinearSemiGradQAgent(env, gamma=1, alpha=alpha, epsilon=0) + return env, agent + + def test_Q_weight_vector_w(self): + # self.tol_qs = 1.9 + self.tol_w = 7 + self.chk_Q_weight_vector_w() + + +class Week11Tests(Report): + title = "Tests for week 11" + pack_imports = [irlc] + individual_imports = [] + questions =[ + # (NStepSarseEvaluationQuestion, 10), + (QAgentQuestion, 10), + (LinearQAgentQuestion, 10), + (LinearSarsaAgentQuestion, 10), + (SarsaQuestion, 10), + (NStepSarsaQuestion, 5), + ] +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week11Tests()) diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 Binary files a/irlc/tests/unitgrade_data/BanditQuestion.pkl and b/irlc/tests/unitgrade_data/BanditQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 Binary files a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl and b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl index 30dd1062d1dd64f89fbe4a1d9559ec33ecfdec49..8d010a1467db4d221532f2ee0a7371c71c132147 100644 Binary files a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl and b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl index 5b7d595636172fded4067cf5f187d482614b79ba..023619080482d1b79f8bf25480a74ab9f4f9b6a9 100644 Binary files a/irlc/tests/unitgrade_data/DirectMethods.pkl and b/irlc/tests/unitgrade_data/DirectMethods.pkl differ diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 Binary files a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl and b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index af00f83a914f3ccd605d208edf577a680f4b4822..7668f6c5761fd79c62f158071fbd494d96a842b9 100644 Binary files a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl and b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index 35da329cde908ee0c76542e26e45a260eda7f19f..f297836116e242443741fc97c303a7586b38e0e2 100644 Binary files a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl and b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl differ diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index d94ebb5d1eaceac887aa37880e80bc64f85537ae..857d1b44cebdb0612421c1ec990febce99722f8c 100644 Binary files a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl and b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl differ diff --git a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl index a00b2d148c0fc04b594b8b9551574d5f265a43b1..4f921dea73c279142e67fb44ce9b2c57aa0668ce 100644 Binary files a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl and b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl differ diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 Binary files a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl and b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl index 9428ff4694b4dc2cdbf360f286f3efccaa252b72..547d7a9f7b6ad4938087db86e35a9b36cee09e65 100644 Binary files a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl index e365fec395fbccdf16de93f115cac629916b4b03..f94cda42c98f0cc556ada3f739585ba233fdd584 100644 Binary files a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl and b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl index 69c70ecba4954fbaf7505af3246803e1403042e6..0af1b2cbfdd1441804be919a7071995cd7bd6acb 100644 Binary files a/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl and b/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl index d0e913ff8885e29f4287fa1e21d720d5ee6fe0ed..f1d8ea1c4051989e2d07c4d440e9ea4f2aa3b24f 100644 Binary files a/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl and b/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/MCAgentQuestion.pkl b/irlc/tests/unitgrade_data/MCAgentQuestion.pkl index c552c3c66dbd63e44581067c53e74783681ffc2a..3e631e5d95d7b0257a00cdadcb8a84bf962be8fa 100644 Binary files a/irlc/tests/unitgrade_data/MCAgentQuestion.pkl and b/irlc/tests/unitgrade_data/MCAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl b/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl index d9cfe1215b3b0ede2b9c0e0ff35452f93eb10249..6a132b537b8762024112ca92a5dad5b3c6682bc6 100644 Binary files a/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl and b/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl b/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl index 10b68255bebadaa13730e897e7a8cd2064666d88..d2afb2c21357b5e6d0a93407654b733d4398d13e 100644 Binary files a/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl and b/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 Binary files a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl and b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 Binary files a/irlc/tests/unitgrade_data/PendulumQuestion.pkl and b/irlc/tests/unitgrade_data/PendulumQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index c367454afea824f4784577e595e85f92c7535338..402ac40b12769d8211ed434cc664b87f02e0be51 100644 Binary files a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl and b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index 8a46ae488da7c6c88812a86d3b9f5deb404329fc..e9261fc8c65ffea540aca333c5e8fc9e31b3b779 100644 Binary files a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl and b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index aa1f77c7396deed36b343f489727774ebcebc3df..402b6e64ed470d789707284b32dc7c0e4940adaa 100644 Binary files a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl and b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl index 6428b65c09cb5b72e9f945066f6343f4d8a24009..4dca1ffc8ec8944f3d0619b092dbfc24d221f77d 100644 Binary files a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl and b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 6e3d0a43209e6cc03cc992ef8fc4be5b219a2fa2..0133173f9abe7ce08622467ef6ce34a34987e782 100644 Binary files a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl and b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index c623107a7d3b7ae5b8b3ae136dbe3b0a806f400a..674e8d3f54aeed4eeb13056dc384ceaf5831f9b3 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index 6c26fffacf4abd79d8016b28dacc3b460d9db347..547769c9bb40f7e2f9e061a3d24943b7bf016ea1 100644 Binary files a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl and b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index d79217522553caef898abe605f8a24a40636a08b..dd7c5af1163a3610ddac292eac48d2bb7792bb9a 100644 Binary files a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl and b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl index e1981fe372bc707bca0260655dc4fc4a44466b19..0ffab948d9210a04dcf3aac3fb2ed0b93dba7ca2 100644 Binary files a/irlc/tests/unitgrade_data/Problem3LQR.pkl and b/irlc/tests/unitgrade_data/Problem3LQR.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 839dd814eff6e41f6fb597adb852de2fbb0f8e19..d17327151a7cf7abcbd9144d790d5816c40e8376 100644 Binary files a/irlc/tests/unitgrade_data/Problem3PID.pkl and b/irlc/tests/unitgrade_data/Problem3PID.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 8b59f1de92e3fd5734568a88df1fea70ae262fe3..42b78bdb0fc666d7de65b7898084a8b2ebfc9357 100644 Binary files a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl and b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index b28d450ca88a391a8ccfa480734fde8141d1b6f4..b3afcdcbdf741bc3db13e7fe30128763d31475ea 100644 Binary files a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl and b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index ee99756d57edf4a222856e4fcb64d101eb1ad454..d11c158fe1b26097970110155f252694dbc24699 100644 Binary files a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl and b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl index 569bd52839bd666ad594a21ada6ef6f6bff3342a..eb2ddd4971a857b980fd75550b2cc7c96ed85318 100644 Binary files a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl and b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index c3fd8909d810a5943171ac685a2e95326d9ae808..2dc14ad1e55bd9a85108809779f809f20ec255a3 100644 Binary files a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl and b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl index e988ad6626a12e54cb6292ed0560835fcde9c488..9cb9f08bc370f4a30110aeaceff81fc5895be6ae 100644 Binary files a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl and b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 60282c5cbf6353a285a6425a363e36a1c4658156..a41d0222d42045ede2d5eacd73513bb2a9100775 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl and b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl index 08d0a1de9624ce2840d456c97c121036d2a6a41d..a5668f0cd26821ec7bd502c8cf6334dbae2f50ec 100644 Binary files a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl and b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl index d313e26e5727bec7415f60a10638d703b76313a3..066b7ad5d643ab4af11fd1ff5482f06707b6e498 100644 Binary files a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl and b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 7035e7095ef9ad98f42285f0164d461f0709159f..00e68ef4a2d4f3938898b5ba0813f27abf322dbc 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl and b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl index 26117dae7081cd5dde5d04e16e133337034e99c2..f242501c0d8cfea0cfff3745b5c79a7e5c7a74a6 100644 Binary files a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl and b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index 3393d7eb47dbebe9239b902d13032a4eebcb48fa..515794b16a025761728494b5a33329e533e5c2d7 100644 Binary files a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl and b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl index a8befec36a3e155f9f1fe055f8d0b2eb841f3795..b579ed7c05dd433dc7d008cdc6ade7223c149822 100644 Binary files a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl and b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl index b5a78afcd03096134dd779734ebd7428b75101f6..5629afc31194b2fc36c1b212f28076015b775d3c 100644 Binary files a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl and b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl differ diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl index 32659e41fc735fcb00b86d77c302cd4f8fefd1fd..ed58c48391a348bfe8dbfeff2205acd5418c086e 100644 Binary files a/irlc/tests/unitgrade_data/Problem9Gambler.pkl and b/irlc/tests/unitgrade_data/Problem9Gambler.pkl differ diff --git a/irlc/tests/unitgrade_data/QAgentQuestion.pkl b/irlc/tests/unitgrade_data/QAgentQuestion.pkl index b2d68c845f99cb476a059f553a9d1fc19471869d..1564ef4461bbd73cb56dcd6940b2c522e7c4bc1d 100644 Binary files a/irlc/tests/unitgrade_data/QAgentQuestion.pkl and b/irlc/tests/unitgrade_data/QAgentQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl index 06cde769c462c7c27150f75bf8abc31dc7c97739..7016d99087fe4b4f4ad3bc18984ce75dc502ebf3 100644 Binary files a/irlc/tests/unitgrade_data/RendevouzItem.pkl and b/irlc/tests/unitgrade_data/RendevouzItem.pkl differ diff --git a/irlc/tests/unitgrade_data/SarsaQuestion.pkl b/irlc/tests/unitgrade_data/SarsaQuestion.pkl index bb6c4f0ca7790d883d7e3b3c3b033768ceba51e4..2074944241492ef40be122832f251c7058ae9948 100644 Binary files a/irlc/tests/unitgrade_data/SarsaQuestion.pkl and b/irlc/tests/unitgrade_data/SarsaQuestion.pkl differ diff --git a/irlc/tests/unitgrade_data/TD0Question.pkl b/irlc/tests/unitgrade_data/TD0Question.pkl index 775a2d90cca489ee99b02f158b2921df81b7a6b2..801506652caca503c3463bf3c4f0c0df23d086f9 100644 Binary files a/irlc/tests/unitgrade_data/TD0Question.pkl and b/irlc/tests/unitgrade_data/TD0Question.pkl differ diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 Binary files a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl and b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl differ diff --git a/irlc/utils/async_wrappers.py b/irlc/utils/async_wrappers.py index 8dbebf533885664ea41f1a9d01e5012a20ec4490..e2df79a99a59d5017ca58c07bdd0b7d655143d2f 100644 --- a/irlc/utils/async_wrappers.py +++ b/irlc/utils/async_wrappers.py @@ -37,3 +37,61 @@ class AsyncTimeLimit(TimeLimit): truncated = True return observation, reward, terminated, truncated, info + + + + +def _fix_webassembly_packages(yes_really_do_it=False): + import importlib + import os + assert yes_really_do_it, "This function is for internal use for deploying webassembly projects. Don't use it in your base dir." + + spec = importlib.util.find_spec("sympy", None) + base = os.path.dirname(spec.origin) + testf = f"{base}/testing/__init__.py" + if base.startswith("/data/data/"): + # with open(testf, 'w') as f: + # f.write("# Nothingatall") + # with open(f"{base}/testing/runtests.py", 'w') as f: + # f.write("# Nothingatall") + + fname = f"{base}/utilities/decorator.py" + assert os.path.isfile(fname) + code = open(fname, 'r').read() + with open(fname, 'w') as f: + # print(f"{fname=}") + f.write(ncode := "\n".join([l for l in code.splitlines() if not l.startswith("from sympy.testing")])) + + code = open(fname := f"{base}/utilities/__init__.py", 'r').read() + code = code.replace("from .timeutils import timed", "timed = lambda x: 3") + with open(fname, 'w') as f: + f.write(code) + + for fname in [f"{base}/core/parameters.py", f"{base}/matrices/utilities.py"]: + code = open(fname, 'r').read() + code = code.replace("from threading import local", "local = object") + with open(fname, 'w') as f: + f.write(code) + + # Fix timeit. + code = open(fname := f"{base}/utilities/timeutils.py", 'r').read() + code = code.replace("import timeit", "# REMOVED") + with open(fname, 'w') as f: + f.write(code) + + code = open(fname := f"{base}/testing/runtests.py", 'r').read() + code = code.replace("from timeit import default_timer as clock", "# REMOVED") + # DocTestFinder, DocTestRunner + # + # code = code.replace("import doctest as pdoctest", "# REMOVED") + + # code = code.replace("from doctest import DocTestFinder, DocTestRunner", "DocTestFinder, DocTestRunner = object, object") + # code = code.replace("pdoctest._indent", "#REMOVED") + # code = code.replace("import doctest", "# REMOVED") + + with open(fname, 'w') as f: + f.write(code) + print("Patched ok.") + """NB. Remember to also patch Decimal by adding extra stuff like exceptions to the decimal-module which is masked by webassembly.""" + + pass diff --git a/irlc/utils/player_wrapper.py b/irlc/utils/player_wrapper.py index e84b48fec75e81c155a330565979af3a2cd6fe6d..be01959d7ffeda87bd2eebe6efc825981459e47d 100644 --- a/irlc/utils/player_wrapper.py +++ b/irlc/utils/player_wrapper.py @@ -88,11 +88,14 @@ async def _webassembly_interactive(env, agent, autoplay=False): def filled_circle(surface, x, y, r, color): pygame.draw.circle(surface, color, (x, y), r, width=0) + def hline(surface, x1, x2, y, color): + pygame.draw.line(surface, color, (x1, y), (x2, y) ) gfxdraw.aapolygon = aapolygon gfxdraw.filled_polygon = filled_polygon gfxdraw.aacircle = aacircle gfxdraw.filled_circle = filled_circle + gfxdraw.hline = hline # from irlc.utils.player_wrapper import AsyncPlayerWrapperPygame