From 2b44091bc4045e71f11153c6b115cdeb7aa53483 Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Thu, 24 Apr 2025 15:41:01 +0200 Subject: [PATCH] Week 11 --- .gitignore | 14 +- irlc/lectures/lec10/lecture_10_mc_control.py | 7 +- .../lec10/lecture_10_mc_onestate_first.py | 2 + .../lec10/lecture_10_mc_q_estimation.py | 15 - .../lec10/lecture_10_mc_value_every.py | 2 +- .../lecture_10_mc_value_first_one_state.py | 22 +- .../lecture_10_mc_value_first_one_state_b.py | 34 -- irlc/lectures/lec10/lecture_10_td_keyboard.py | 2 + irlc/lectures/lec10/utils.py | 20 ++ irlc/lectures/lec11/__init__.py | 1 + irlc/lectures/lec11/lecture_10_grid_lin_q.py | 10 + irlc/lectures/lec11/lecture_10_sarsa_open.py | 13 + .../lecture_11_mountaincar_feature_space.py | 25 ++ .../lec11/lecture_11_mountaincar_nolearn.py | 25 ++ .../lecture_11_mountaincar_random_weights.py | 25 ++ irlc/lectures/lec11/lecture_11_nstep_open.py | 11 + .../lectures/lec11/lecture_11_pacman_lin_q.py | 32 ++ irlc/lectures/lec11/lecture_11_pacman_q.py | 35 ++ irlc/lectures/lec11/lecture_11_q.py | 10 + irlc/lectures/lec11/lecture_11_q_cliff.py | 18 + irlc/lectures/lec11/lecture_11_q_open.py | 12 + irlc/lectures/lec11/lecture_11_sarsa.py | 9 + irlc/lectures/lec11/lecture_11_sarsa_cliff.py | 33 ++ irlc/lectures/lec11/mountain_car_env.py | 326 ++++++++++++++++++ irlc/lectures/lec11/sarsa_nstep.py | 11 + irlc/tests/tests_week10.py | 132 +++++++ irlc/tests/tests_week11.py | 200 +++++++++++ irlc/tests/unitgrade_data/BanditQuestion.pkl | Bin 96256 -> 96256 bytes .../unitgrade_data/CartpoleCostQuestion.pkl | Bin 5447 -> 5447 bytes .../unitgrade_data/CartpoleTimeQuestion.pkl | Bin 5447 -> 5447 bytes .../unitgrade_data/DirectAgentPendulum.pkl | Bin 231 -> 231 bytes irlc/tests/unitgrade_data/DirectMethods.pkl | Bin 1459 -> 1459 bytes .../unitgrade_data/DirectSolverQuestion.pkl | Bin 5447 -> 5447 bytes .../Exam5InventoryEvaluation.pkl | Bin 217 -> 217 bytes irlc/tests/unitgrade_data/Exam6Toy2d.pkl | Bin 283 -> 283 bytes .../ExamQuestion7FlowersStore.pkl | Bin 182 -> 182 bytes irlc/tests/unitgrade_data/ExamQuestionTD0.pkl | Bin 468 -> 468 bytes .../unitgrade_data/GradientBanditQuestion.pkl | Bin 96256 -> 96256 bytes .../unitgrade_data/ILQRAgentQuestion.pkl | Bin 326 -> 326 bytes .../unitgrade_data/ILQRPendulumQuestion.pkl | Bin 298 -> 298 bytes .../unitgrade_data/LinearQAgentQuestion.pkl | Bin 28764 -> 28764 bytes .../LinearSarsaAgentQuestion.pkl | Bin 28768 -> 28768 bytes irlc/tests/unitgrade_data/MCAgentQuestion.pkl | Bin 4714 -> 6025 bytes .../unitgrade_data/MCEvaluationQuestion.pkl | Bin 8592 -> 3707 bytes .../unitgrade_data/NStepSarsaQuestion.pkl | Bin 282 -> 282 bytes .../NonstatiotnaryAgentQuestion.pkl | Bin 96256 -> 96256 bytes .../tests/unitgrade_data/PendulumQuestion.pkl | Bin 5447 -> 5447 bytes .../unitgrade_data/Problem1BobsFriend.pkl | Bin 170 -> 170 bytes .../Problem1DiscreteKuromoto.pkl | Bin 570 -> 570 bytes .../tests/unitgrade_data/Problem1Kuramoto.pkl | Bin 3014 -> 3014 bytes .../unitgrade_data/Problem1_to_3_Warmup.pkl | Bin 497 -> 497 bytes .../unitgrade_data/Problem2BobsPolicy.pkl | Bin 368 -> 368 bytes .../Problem2DeterministicDP.pkl | Bin 161 -> 161 bytes .../Problem2DeterministicInventory.pkl | Bin 128 -> 128 bytes .../Problem3InventoryInventoryEnvironment.pkl | Bin 323 -> 323 bytes irlc/tests/unitgrade_data/Problem3LQR.pkl | Bin 2025 -> 2025 bytes irlc/tests/unitgrade_data/Problem3PID.pkl | Bin 334 -> 334 bytes .../unitgrade_data/Problem3StochasticDP.pkl | Bin 345 -> 345 bytes irlc/tests/unitgrade_data/Problem4DPAgent.pkl | Bin 121 -> 121 bytes .../unitgrade_data/Problem4InventoryTrain.pkl | Bin 242 -> 242 bytes .../tests/unitgrade_data/Problem4LQRAgent.pkl | Bin 443 -> 443 bytes .../tests/unitgrade_data/Problem4PIDAgent.pkl | Bin 4673 -> 4673 bytes .../Problem4PolicyEvaluation.pkl | Bin 621 -> 621 bytes .../Problem5PacmanHardcoded.pkl | Bin 125 -> 125 bytes .../Problem5PolicyIteration.pkl | Bin 402 -> 402 bytes .../unitgrade_data/Problem5_6_Boeing.pkl | Bin 4219 -> 4219 bytes .../Problem6ChessTournament.pkl | Bin 197 -> 197 bytes .../unitgrade_data/Problem6ValueIteration.pkl | Bin 400 -> 400 bytes irlc/tests/unitgrade_data/Problem7PIDCar.pkl | Bin 419 -> 419 bytes .../unitgrade_data/Problem7_8_PidLQR.pkl | Bin 415 -> 415 bytes .../Problem8ValueIterationAgent.pkl | Bin 324 -> 324 bytes irlc/tests/unitgrade_data/Problem9Gambler.pkl | Bin 1083 -> 1083 bytes irlc/tests/unitgrade_data/QAgentQuestion.pkl | Bin 6517 -> 9533 bytes irlc/tests/unitgrade_data/RendevouzItem.pkl | Bin 603 -> 603 bytes irlc/tests/unitgrade_data/SarsaQuestion.pkl | Bin 277 -> 277 bytes irlc/tests/unitgrade_data/TD0Question.pkl | Bin 1698 -> 5365 bytes .../tests/unitgrade_data/UCBAgentQuestion.pkl | Bin 96256 -> 96256 bytes irlc/utils/async_wrappers.py | 58 ++++ irlc/utils/player_wrapper.py | 3 + 79 files changed, 1033 insertions(+), 74 deletions(-) create mode 100644 irlc/lectures/lec10/utils.py create mode 100644 irlc/lectures/lec11/__init__.py create mode 100644 irlc/lectures/lec11/lecture_10_grid_lin_q.py create mode 100644 irlc/lectures/lec11/lecture_10_sarsa_open.py create mode 100644 irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py create mode 100644 irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py create mode 100644 irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py create mode 100644 irlc/lectures/lec11/lecture_11_nstep_open.py create mode 100644 irlc/lectures/lec11/lecture_11_pacman_lin_q.py create mode 100644 irlc/lectures/lec11/lecture_11_pacman_q.py create mode 100644 irlc/lectures/lec11/lecture_11_q.py create mode 100644 irlc/lectures/lec11/lecture_11_q_cliff.py create mode 100644 irlc/lectures/lec11/lecture_11_q_open.py create mode 100644 irlc/lectures/lec11/lecture_11_sarsa.py create mode 100644 irlc/lectures/lec11/lecture_11_sarsa_cliff.py create mode 100644 irlc/lectures/lec11/mountain_car_env.py create mode 100644 irlc/lectures/lec11/sarsa_nstep.py create mode 100644 irlc/tests/tests_week10.py create mode 100644 irlc/tests/tests_week11.py diff --git a/.gitignore b/.gitignore index c014141..53552e8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,10 +10,10 @@ exam_tabular_examples #solutions/ex07 #solutions/ex08 # solutions/ex09 -solutions/ex10 -solutions/ex11 -solutions/ex12 -solutions/ex13 +#solutions/ex10 +#solutions/ex11 +#solutions/ex12 +#solutions/ex13 #irlc/ex03 #irlc/ex04 @@ -36,8 +36,8 @@ solutions/ex13 #irlc/tests/tests_week07.py #irlc/tests/tests_week08.py # irlc/tests/tests_week09.py -irlc/tests/tests_week10.py -irlc/tests/tests_week11.py +#irlc/tests/tests_week10.py +#irlc/tests/tests_week11.py irlc/tests/tests_week12.py irlc/tests/tests_week13.py @@ -74,7 +74,7 @@ irlc/exam/exam20*/solution #irlc/lectures/lec08 # irlc/lectures/lec09 #irlc/lectures/lec10 -irlc/lectures/lec11 +#irlc/lectures/lec11 irlc/lectures/lec12 irlc/lectures/lec13 diff --git a/irlc/lectures/lec10/lecture_10_mc_control.py b/irlc/lectures/lec10/lecture_10_mc_control.py index e286478..b727d36 100644 --- a/irlc/lectures/lec10/lecture_10_mc_control.py +++ b/irlc/lectures/lec10/lecture_10_mc_control.py @@ -2,12 +2,17 @@ from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play from irlc.gridworld.gridworld_environments import BookGridEnvironment from irlc.ex10.mc_agent import MCAgent +# from irlc.lectures.lec10.utils import MCAgentResettable + import numpy as np if __name__ == "__main__": np.random.seed(433) - env = BookGridEnvironment(render_mode='human',zoom=2) + env = BookGridEnvironment(render_mode='human',zoom=2, living_reward=-0.05) # agent = MCAgent(env, gamma=0.9, epsilon=0.15, alpha=0.1, first_visit=True) + from irlc.lectures.lec10.utils import agent_reset + MCAgent.reset = agent_reset agent = MCAgent(env, gamma=1.0, epsilon=0.15, alpha=None, first_visit=True) + # env, agent = interactive(env, agent) keyboard_play(env,agent,method_label='MC control') diff --git a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py index c111aa6..32b7afa 100644 --- a/irlc/lectures/lec10/lecture_10_mc_onestate_first.py +++ b/irlc/lectures/lec10/lecture_10_mc_onestate_first.py @@ -12,6 +12,8 @@ class CaughtGrid(GridworldEnvironment): def __init__(self, **kwargs): super().__init__(map, living_reward=1, zoom=1.5, **kwargs) + + if __name__ == "__main__": env = CaughtGrid(view_mode=1, render_mode='human') agent = MCEvaluationAgent(env, gamma=1, alpha=None) diff --git a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py index 4ba40a2..bdba3e1 100644 --- a/irlc/lectures/lec10/lecture_10_mc_q_estimation.py +++ b/irlc/lectures/lec10/lecture_10_mc_q_estimation.py @@ -11,21 +11,6 @@ def keyboard_play(env, agent, method_label='MC',autoplay=False, num_episodes=100 env.close() -def automatic_play(env, agent, method_label='MC'): - # agent = PlayWrapper(agent, env) - env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) - train(env, agent, num_episodes=1000) - env.close() - -def automatic_play_value(env, agent, method_label='MC'): - agent.label = method_label - env, agent = interactive(env, agent) - - # env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('v'), render_kwargs={'method_label': method_label}) - # agent = PlayWrapper(agent, env) - train(env, agent, num_episodes=1000) - env.close() - if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', zoom=2, living_reward=-0.05) from irlc.ex10.mc_agent import MCAgent diff --git a/irlc/lectures/lec10/lecture_10_mc_value_every.py b/irlc/lectures/lec10/lecture_10_mc_value_every.py index 8598fa5..d42c5ac 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_every.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_every.py @@ -6,6 +6,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent if __name__ == "__main__": env = BookGridEnvironment(view_mode=1, render_mode='human', living_reward=-0.05) - agent = MCEvaluationAgent(env, gamma=.9, alpha=None, first_visit=False) + agent = MCEvaluationAgent(env, gamma=1, alpha=None, first_visit=False) keyboard_play_value(env,agent,method_label='MC every') diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py index c998543..1740661 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state.py @@ -6,6 +6,7 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent import numpy as np from irlc import interactive, train + class MCAgentOneState(MCEvaluationAgent): def __init__(self, *args, state=None, **kwargs): a = 34 @@ -17,18 +18,19 @@ class MCAgentOneState(MCEvaluationAgent): def _clear_states(self, val=None): for s in self.env.mdp.nonterminal_states: - # for a in self.env.mdp.A(s): - # self.Q[s,a] = 0 if s != self.state: self.returns_sum_S[s] = val self.returns_count_N[s] = val - if s in self.v: k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0] if not self.env.mdp.is_terminal(k): del self.v[s] + def reset(self): + from irlc.lectures.lec10.utils import agent_reset + agent_reset(self) + self._clear_states(None) def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None): # self.episode = [e for e in self.episode if e[0] == self.state] @@ -39,6 +41,7 @@ class MCAgentOneState(MCEvaluationAgent): if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=True, zoom=2) + agent = MCAgentOneState(env, gamma=1, alpha=None, first_visit=True) method_label = 'MC (gamma=1)' agent.label = method_label @@ -49,16 +52,3 @@ if __name__ == "__main__": num_episodes = 1000 train(env, agent, num_episodes=num_episodes) env.close() - - import matplotlib.pyplot as plt - import numpy as np - - import matplotlib.pyplot as plt - import numpy as np - - lt = np.linspace(np.log(1000), np.log(2000) + 0*5000) - plt.plot(lt, 5 + 2 * np.sqrt(lt / 500), 'k-') - plt.plot(lt, 10 + 2 * np.sqrt(lt / (np.exp(lt) - 500)), 'r-') - plt.xlabel('log(t)') - plt.show() - # keyboard_play(env,agent,method_label='MC (alpha=0.5)') diff --git a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py index 6567221..4f7c8d2 100644 --- a/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py +++ b/irlc/lectures/lec10/lecture_10_mc_value_first_one_state_b.py @@ -7,40 +7,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent import numpy as np from irlc import interactive, train -# class MCAgentOneState(MCEvaluationAgent): -# def __init__(self, *args, state=None, **kwargs): -# a = 34 -# super().__init__(*args, **kwargs) -# if state is None: -# state = self.env.mdp.initial_state -# self.state = state -# self._clear_states() -# -# def _clear_states(self, val=None): -# for s in self.env.mdp.nonterminal_states: -# # for a in self.env.mdp.A(s): -# # self.Q[s,a] = 0 -# if s != self.state: -# self.returns_sum_S[s] = val -# self.returns_count_N[s] = val -# if s in self.v: -# k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0] -# if not self.env.mdp.is_terminal(k): -# -# del self.v[s] -# -# def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None): -# # self.episode = [e for e in self.episode if e[0] == self.state] -# self._clear_states(0) -# super().train(s, a, r, sp, done) -# # Clear out many of the state, actions: -# self._clear_states(None) -# # for s in self.env.mdp.nonterminal_states: -# # if s != self.state: -# # self.v[s] = None -# -# pass - if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05) diff --git a/irlc/lectures/lec10/lecture_10_td_keyboard.py b/irlc/lectures/lec10/lecture_10_td_keyboard.py index 8787900..d1c9d9d 100644 --- a/irlc/lectures/lec10/lecture_10_td_keyboard.py +++ b/irlc/lectures/lec10/lecture_10_td_keyboard.py @@ -2,8 +2,10 @@ from irlc.lectures.lec10.lecture_10_mc_q_estimation import automatic_play_value from irlc.gridworld.gridworld_environments import BookGridEnvironment from irlc.ex10.td0_evaluate import TD0ValueAgent +from irlc.lectures.lec10.utils import agent_reset if __name__ == "__main__": env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + TD0ValueAgent.reset = agent_reset agent = TD0ValueAgent(env, gamma=1.0, alpha=0.2) automatic_play_value(env,agent,method_label='TD(0)') diff --git a/irlc/lectures/lec10/utils.py b/irlc/lectures/lec10/utils.py new file mode 100644 index 0000000..fc89454 --- /dev/null +++ b/irlc/lectures/lec10/utils.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex10.mc_agent import MCAgent +from irlc.ex09.rl_agent import TabularQ + +class MCAgentResettable(MCAgent): + def reset(self): + return agent_reset(self) + +def agent_reset(self): + # General reset option. Wroks on many agents. + attrs = ['returns_sum_S', 'returns_count_N', 'Q', 'v'] + + for attr in attrs: + if hasattr(self, attr): + at = getattr(self, attr) + if isinstance(at, dict): + at.clear() + + if hasattr(self, 'Q') and isinstance(self.Q, TabularQ): + self.Q.q_.clear() diff --git a/irlc/lectures/lec11/__init__.py b/irlc/lectures/lec11/__init__.py new file mode 100644 index 0000000..a56057c --- /dev/null +++ b/irlc/lectures/lec11/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec11/lecture_10_grid_lin_q.py b/irlc/lectures/lec11/lecture_10_grid_lin_q.py new file mode 100644 index 0000000..659201d --- /dev/null +++ b/irlc/lectures/lec11/lecture_10_grid_lin_q.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.berkley.rl.semi_grad_q import LinearSemiGradQAgent +from irlc.ex11.feature_encoder import GridworldXYEncoder +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = LinearSemiGradQAgent(env, gamma=0.95, epsilon=0.1, alpha=.01, q_encoder=GridworldXYEncoder(env)) + keyboard_play(env, agent, method_label="Q-lin-xy") diff --git a/irlc/lectures/lec11/lecture_10_sarsa_open.py b/irlc/lectures/lec11/lecture_10_sarsa_open.py new file mode 100644 index 0000000..5793603 --- /dev/null +++ b/irlc/lectures/lec11/lecture_10_sarsa_open.py @@ -0,0 +1,13 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import OpenGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.sarsa_agent import SarsaAgent + +def open_play(Agent, method_label, frames_per_second=30, **args): + env = OpenGridEnvironment(render_mode='human', frames_per_second=frames_per_second) + agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args) + method_label = f"{method_label} (gamma=0.99, epsilon=0.1, alpha=0.5)" + keyboard_play(env, agent, method_label=method_label) + +if __name__ == "__main__": + open_play(SarsaAgent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py new file mode 100644 index 0000000..1c99f03 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_feature_space.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.pacman.pacman_resources import WHITE, BLACK +from irlc.utils.graphics_util_pygame import GraphicsUtilGym +from irlc.lectures.lec11.mountain_car_env import MountainCarVisualization +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + + # env = gym.make("MountainCar-v0") + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py new file mode 100644 index 0000000..bb94976 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_nolearn.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +class RandomWeightAgent(LinearSemiGradSarsa): + def train(self, *args, **kwargs): + pass + pass + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0 + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py new file mode 100644 index 0000000..e870943 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_mountaincar_random_weights.py @@ -0,0 +1,25 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +import numpy as np +from irlc.lectures.lec11.mountain_car_env import FancyMountainCar +from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + +class RandomWeightAgent(LinearSemiGradSarsa): + def train(self, *args, **kwargs): + super().train(*args, **kwargs) + self.Q.w = np.random.randn(self.Q.w.shape[0]) + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + # env = gym.make("MountainCar-v0") + agent = RandomWeightAgent(env) #(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/lecture_11_nstep_open.py b/irlc/lectures/lec11/lecture_11_nstep_open.py new file mode 100644 index 0000000..ab672b2 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_nstep_open.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor + +from irlc.ex11.nstep_sarsa_agent import SarsaNAgent +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent + +from irlc.lectures.lec11.lecture_10_sarsa_open import open_play +if __name__ == "__main__": + # env = OpenGridEnvironment() + # agent = (env, gamma=0.95, epsilon=0.1, alpha=.5) + open_play(SarsaDelayNAgent, method_label="N-step Sarsa n=8", n=8) diff --git a/irlc/lectures/lec11/lecture_11_pacman_lin_q.py b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py new file mode 100644 index 0000000..3b7e121 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_pacman_lin_q.py @@ -0,0 +1,32 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex11.semi_grad_q import LinearSemiGradQAgent +from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper +from irlc.ex11.feature_encoder import SimplePacmanExtractor +import matplotlib.pyplot as plt +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +# from irlc import PlayWrapper +from irlc import interactive + +def play_pacman(env, agent, layout = 'smallGrid'): + train(env, agent, num_episodes=100) + + env2 = PacmanWinWrapper(env) + + # env2 = Monitor(env2, directory="experiments/randomdir", force=True) + # env2 = VideoMonitor(env2) + env2, agent = interactive(env, agent) + agent.epsilon = 0 + agent.alpha = 0 + # agent = PlayWrapper(agent, env2) + train(env2, agent, num_episodes=100) + plt.show() + env.close() + +if __name__ == "__main__": + layout = 'smallGrid' + env = PacmanEnvironment(animate_movement=True, layout=layout, render_mode='human', frames_per_second=100) + qex = SimplePacmanExtractor(env) + agent = LinearSemiGradQAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8, q_encoder=qex) + play_pacman(env, agent, layout = 'smallGrid') + # main_plot('experiments/q_lin') diff --git a/irlc/lectures/lec11/lecture_11_pacman_q.py b/irlc/lectures/lec11/lecture_11_pacman_q.py new file mode 100644 index 0000000..7a51a06 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_pacman_q.py @@ -0,0 +1,35 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor +# from irlc.utils.player_wrapper_pyglet import PlayWrapper +from irlc import main_plot +import matplotlib.pyplot as plt +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +# from irlc.lectures.lecture_09_mc import keyboard_play +from irlc.ex11.q_agent import QAgent +from irlc import interactive + + +def play_pacman(env, agent, layout = 'smallGrid'): + + train(env, agent, num_episodes=100) + env2 = PacmanWinWrapper(env) + # env2 = Monitor(env2, directory="experiments/randomdir", force=True) + # env2 = VideoMonitor(env2) + env2, agent = interactive(env2, agent) + agent.epsilon = 0 + agent.alpha = 0 + # agent = PlayWrapper(agent, env2) + train(env2, agent, num_episodes=100) + plt.show() + env.close() + +if __name__ == "__main__": + layout = 'smallGrid' + env = PacmanEnvironment(animate_movement=False, layout=layout, render_mode='human') + agent = QAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8) + # from irlc import PlayWrapper + # agent = PlayWrapper(agent, env) + play_pacman(env, agent, layout = 'smallGrid') + # main_plot('experiments/q_lin') diff --git a/irlc/lectures/lec11/lecture_11_q.py b/irlc/lectures/lec11/lecture_11_q.py new file mode 100644 index 0000000..d3df9db --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q.py @@ -0,0 +1,10 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.q_agent import QAgent + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = QAgent(env, gamma=0.95, epsilon=0.1, alpha=.2) + keyboard_play(env, agent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_q_cliff.py b/irlc/lectures/lec11/lecture_11_q_cliff.py new file mode 100644 index 0000000..421db1f --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q_cliff.py @@ -0,0 +1,18 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2 +from irlc.ex11.q_agent import QAgent + + +# def cliffwalk(env, agent, method_label="method"): +# agent = PlayWrapper(agent, env) + # env = VideoMonitor(env, agent=agent, fps=100, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) + # train(env, agent, num_episodes=200) + # env.close() + +from irlc.lectures.lec11.lecture_11_sarsa_cliff import cliffwalk, gamma, alpha, epsi +if __name__ == "__main__": + import numpy as np + np.random.seed(1) + env = CliffGridEnvironment2(zoom=.8, render_mode='human') + agent = QAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha) + cliffwalk(env, agent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_q_open.py b/irlc/lectures/lec11/lecture_11_q_open.py new file mode 100644 index 0000000..f0a35a5 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_q_open.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld_pyglet.gridworld_environments import OpenGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.ex11.q_agent import QAgent + +def open_play(Agent, method_label, **args): + env = OpenGridEnvironment() + agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args) + keyboard_play(env, agent, method_label=method_label) + +if __name__ == "__main__": + open_play(QAgent, method_label="Q-learning") diff --git a/irlc/lectures/lec11/lecture_11_sarsa.py b/irlc/lectures/lec11/lecture_11_sarsa.py new file mode 100644 index 0000000..791a1b4 --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_sarsa.py @@ -0,0 +1,9 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.96, n=1) + keyboard_play(env, agent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/lecture_11_sarsa_cliff.py b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py new file mode 100644 index 0000000..3d250fa --- /dev/null +++ b/irlc/lectures/lec11/lecture_11_sarsa_cliff.py @@ -0,0 +1,33 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +# from irlc.utils.player_wrapper_pyglet import PlayWrapper +from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2 +# from irlc.utils.video_monitor import VideoMonitor +from irlc.ex01.agent import train +from irlc import interactive +from irlc.ex11.sarsa_agent import SarsaAgent + + +def cliffwalk(env, agent, method_label="method"): + # agent = PlayWrapper(agent, env) + env.label = method_label + agent.method_label = method_label + agent.label = method_label + agent.method = method_label + + + env, agent = interactive(env, agent) + # env = VideoMonitor(env, agent=agent, fps=200, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label}) + train(env, agent, num_episodes=1000) + env.close() + +epsi = 0.5 +gamma = 1.0 +alpha = .3 + +if __name__ == "__main__": + import numpy as np + np.random.seed(1) + env = CliffGridEnvironment2(zoom=.8, render_mode='human') + agent = SarsaAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha) + # agent = QAgent(env, gamma=0.95, epsilon=0.5, alpha=.2) + cliffwalk(env, agent, method_label="Sarsa") diff --git a/irlc/lectures/lec11/mountain_car_env.py b/irlc/lectures/lec11/mountain_car_env.py new file mode 100644 index 0000000..c105e3a --- /dev/null +++ b/irlc/lectures/lec11/mountain_car_env.py @@ -0,0 +1,326 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from gymnasium.envs.classic_control import MountainCarEnv +import math +from typing import Optional +import numpy as np +import gymnasium as gym +from gymnasium import spaces +from gymnasium.envs.classic_control import utils +from gymnasium.error import DependencyNotInstalled + +class FancyMountainCar(MountainCarEnv): # piggybag on the original env. + visualization = None + + def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): + super().__init__(render_mode=render_mode, goal_velocity=goal_velocity) + + def render(self): + if self.visualization is None: + self.visualization = MountainCarVisualization(self, self.agent if hasattr(self, 'agent') else None) + return self.visualization.render() + + def close(self): + if self.visualization is not None: + self.visualization.close() + + +from irlc.pacman.pacman_resources import WHITE, BLACK +from irlc.utils.graphics_util_pygame import GraphicsUtilGym +class MountainCarVisualization: + def __init__(self, env, agent): + self.env = env + self.agent = agent + + # self.k = 0 + # self.states = [] + # self.actions = [] + # self.factories = [] + # self.inventory = inventory + # xmin = -0.2 + # xmax = inventory.N * 2 + 1.4 + # xmax = 4 + + # ymin = -0.4 + # ymax = 1.4 + 0.2 + # dx = xmax - xmin + # dy = ymax - ymin + self.ga = GraphicsUtilGym() + # screen_width = 1300 + screen_width = env.screen_width * 2 + # + # -env.min_position + # env.max_position + + xmin = env.min_position + xmax = env.max_position + 1.8 + # env._height + + screen_height = env.screen_height + ymin = 0 + ymax = 1.2 + # screen_height = dy * (screen_width / dx) + frames_per_second = 30 + self.ga.begin_graphics(screen_width, screen_height, + local_xmin_xmax_ymin_ymax=(xmin, xmax, ymax, ymin), frames_per_second=frames_per_second, + color=WHITE, title=f"MountainCar Environment") + + # self.last_action = None + # self.agent = None + # self.last_reward = None + # self.scale = screen_width / dx + + x_cache = [] + + + def render(self): + # if self.env.render_mode is None: + # assert self.env.spec is not None + # gym.logger.warn( + # "You are calling render method without specifying any render mode. " + # "You can specify the render_mode at initialization, " + # f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")' + # ) + # return + # try: + # import pygame + # from pygame import gfxdraw + # except ImportError as e: + # raise DependencyNotInstalled( + # 'pygame is not installed, run `pip install "gymnasium[classic_control]"`' + # ) from e + + # + # + # if self.screen is None: + # pygame.init() + # if self.render_mode == "human": + # pygame.display.init() + # self.screen = pygame.display.set_mode( + # (self.screen_width, self.screen_height) + # ) + # else: # mode in "rgb_array" + # self.screen = pygame.Surface((self.screen_width, self.screen_height)) + # if self.clock is None: + # self.clock = pygame.time.Clock() + self.ga.draw_background() + # self.ga.circle("sadf", pos=(0,0), r=100, fillColor=(100, 10, 50)) + + pos = self.env.state[0] + scale = 1 + + xs = np.linspace(self.env.min_position, self.env.max_position, 100) + ys = self.env._height(xs) + # xys = list(zip((xs - self.env.min_position) * scale, ys * scale)) + + self.ga.polyline("asdfasfd", xs=xs, ys=ys, width=1) + + + # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) + + + + world_width = self.env.max_position - self.env.min_position + # scale = self.screen_width / world_width + rscale = self.env.screen_width / world_width + + carwidth = 40 / rscale + carheight = 20 / rscale + + # self.surf = pygame.Surface((self.screen_width, self.screen_height)) + # self.surf.fill((255, 255, 255)) + + # pos = self.state[0] + + # xs = np.linspace(self.min_position, self.max_position, 100) + # ys = self._height(xs) + # xys = list(zip((xs - self.min_position) * scale, ys * scale)) + + # pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) + import pygame + clearance = 10 / rscale + # clearance=0.01 + + l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0 + coords = [] + for c in [(l, b), (l, t), (r, t), (r, b)]: + c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) + coords.append( + ( + c[0] + (pos - 0*self.env.min_position) * scale, + c[1] + clearance + self.env._height(pos) * scale, + ) + ) + self.ga.polygon("adsfasdf", coords=coords, outlineColor=BLACK, fillColor=BLACK, width=2) + # gfxdraw.aapolygon(self.surf, coords, (0, 0, 0)) + # gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0)) + + + for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]: + c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) + wheel = ( + c[0] + (pos - 0*self.env.min_position) * scale, + c[1] + clearance + self.env._height(pos) * scale, + ) + + # gfxdraw.aacircle( + # self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) + # ) + + self.ga.circle("asdf", (wheel[0], wheel[1]), int(carheight / 2.5*rscale), fillColor=(128, 128, 128), outlineColor= (70, 70, 70)) + # + # gfxdraw.filled_circle( + # self.surf, wheel[0], wheel[1], int(carheight / 2.5 * rscale), (128, 128, 128) + # ) + + flagx = (self.env.goal_position - 0*self.env.min_position) * scale + flagy1 = self.env._height(self.env.goal_position) * scale + flagy2 = flagy1 + 50/rscale + self.ga.line("asdfasdf", (flagx, flagy1), (flagx, flagy2), color=(0, 0, 0)) + + self.ga.polygon( + "sdfasdf", + [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)], + (204, 204, 0), + ) + # gfxdraw.aapolygon( + # self.surf, + # [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5/rscale)], + # (204, 204, 0), + # ) + # gfxdraw.filled_polygon( + # self.surf, + # [(flagx, flagy2), (flagx, flagy2 - 10/rscale), (flagx + 25/rscale, flagy2 - 5)], + # (204, 204, 0), + # ) + # Optionally draw the value functino. + # oxmin = 0.6 + # oxmax = 1.7 + # oymin = 0 + # oymax = 1 + + # self.env.observation_space + # dx = 1.5 + # dy = 0 + + # sX = 1 + # sY = 1 + + # Pscale = 1 + Vscale = 6 + + # def pos2s(pos):#, vel): + # return pos + 1.8 #, (vel + 0.2) * 3 + # def vel2s(vel): + # return (vel + 0.) * Vscale + + def x2s(pos, vel): + return pos + 1.75, (vel + 0.1) * Vscale + + xmin,ymin = x2s(self.env.observation_space.low[0], self.env.observation_space.low[1] ) + xmax,ymax = x2s(self.env.observation_space.high[0], self.env.observation_space.high[1] ) + + px, py = x2s( *np.asarray(self.env.state).tolist()) + + + + # self.env.observation_space.low + if self.agent is not None: + + def colfunc(val, minval, maxval, startcolor, stopcolor): + """ Convert value in the range minval...maxval to a color in the range + startcolor to stopcolor. The colors passed and the one returned are + composed of a sequence of N component values (e.g. RGB). + """ + f = float(val - minval) / (maxval - minval) + return tuple( float( f * (b - a) + a) for (a, b) in zip(startcolor, stopcolor)) + + RED, YELLOW, GREEN = (1, 0, 0), (1, 1, 0), (0, 1, 0) + CYAN, BLUE, MAGENTA = (0, 1, 1), (0, 0, 1), (1, 0, 1) + steps = 10 + minval, maxval = 0.0, 1.0 + # incr = (maxval - minval) / steps + # for i in range(steps + 1): + # val = minval + round(i * incr, 1) + # # print('{:.1f} -> ({:.3f}, {:.3f}, {:.3f})'.format( + # # val, *colfunc(val, minval, maxval, BLUE, RED))) + + value_function = lambda s: -max(self.agent.Q.get_Qs(s)[1]) + + grid_size = 40 + # grid_size = 30 + low = self.env.unwrapped.observation_space.low + high = self.env.unwrapped.observation_space.high + X, Y = np.meshgrid(np.linspace(low[0], high[0], grid_size), np.linspace(low[1], high[1], grid_size)) + Z = X * 0 + + if self.x_cache is None or len(self.x_cache) == 0: + for i, (x, y) in enumerate(zip(X.flat, Y.flat)): + s = (x, y) + xx = [self.agent.Q.x(s, a) for a in range(self.env.action_space.n) ] + self.x_cache.append(xx) + # Z.flat[i] = value_function((x, y)) + pass + # for i, (x, y) in enumerate(zip(X.flat, Y.flat)): + # # [max([float(self.agent.Q.w @ dx) for dx in xx]) for xx in self.x_cache] + # + # + # + # Z.flat[i] = value_function((x, y)) + # pass + for i in range(len(self.x_cache)): + Z.flat[i] = max([float(self.agent.Q.w @ dx) for dx in self.x_cache[i]]) + pass + + for i in range(len(Z.flat)): + ddx = (X.max() - X.min()) / (grid_size-1) + ddy = (Y.max() - Y.min()) / (grid_size-1) + + z = colfunc(Z.flat[i], Z.min(), Z.max()+0.01, BLUE, RED) + + z = tuple( int(x*255) for x in z) + + xmin, ymin = x2s(X.flat[i], Y.flat[i]) + xmax, ymax = x2s(X.flat[i]+ddx, Y.flat[i]+ddy) + + self.ga.rectangle(color=z, x=xmin, y=ymin, width=xmax-xmin, height=ymax-ymin) + pass + # colfunc(val, minval, maxval, startcolor, stopcolor): + + self.ga.rectangle(color=BLACK, x=xmin, y=ymin, width=xmax - xmin, height=ymax - ymin, border=1) + self.ga.circle("asdf", (px, py), r=5, fillColor=(200, 200, 200)) + + return self.ga.blit(render_mode=self.env.render_mode) + + # self.surf = pygame.transform.flip(self.surf, False, True) + # self.screen.blit(self.surf, (0, 0)) + # if self.render_mode == "human": + # pygame.event.pump() + # self.clock.tick(self.metadata["render_fps"]) + # pygame.display.flip() + # + # elif self.render_mode == "rgb_array": + # return np.transpose( + # np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) + # ) + + def close(self): + self.ga.close() + +if __name__ == '__main__': + from irlc import Agent, interactive, train + env = FancyMountainCar(render_mode='human') + num_of_tilings = 8 + alpha = 0.3 + from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + # env = gym.make("MountainCar-v0") + agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0) + # agent = Agent(env) + + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) + + env.close() + + + + pass diff --git a/irlc/lectures/lec11/sarsa_nstep.py b/irlc/lectures/lec11/sarsa_nstep.py new file mode 100644 index 0000000..7687d17 --- /dev/null +++ b/irlc/lectures/lec11/sarsa_nstep.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent +from irlc import interactive, train + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human') + agent = SarsaDelayNAgent(env, gamma=1, epsilon=0.1, alpha=0.9, n=1) # Exam problem. + # agent = SarsaDelayNAgent(env, gamma=0.95, epsilon=0.1, alpha=.2, n=1) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=10) diff --git a/irlc/tests/tests_week10.py b/irlc/tests/tests_week10.py new file mode 100644 index 0000000..b5dd4e6 --- /dev/null +++ b/irlc/tests/tests_week10.py @@ -0,0 +1,132 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex10.question_td0 import a_compute_deltas, b_perform_td0, c_perform_td0_batched +from unitgrade import Report, UTestCase, cache +from irlc import train +import irlc.ex10.envs +import gymnasium as gym +from gymnasium.wrappers import TimeLimit +from irlc.tests.tests_week08 import train_recording + + +class MCAgentQuestion(UTestCase): + """ Test of MC agent """ + def get_env_agent(self): + from irlc.ex10.mc_agent import MCAgent + env = gym.make("SmallGridworld-v0") + env = TimeLimit(env, max_episode_steps=1000) + gamma = .8 + agent = MCAgent(env, gamma=gamma, first_visit=True) + return env, agent + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.Q.to_dict() + + def test_Q_function(self): + trajectories, Q = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + Qc = [] + Qe = [] + for s, qa in Q.items(): + for a,q in qa.items(): + Qe.append(q) + Qc.append(agent.Q[s,a]) + + self.assertL2(Qe, Qc, tol=1e-5) + + +# class BlackjackQuestion(UTestCase): +# """ MC policy evaluation agent and Blacjack """ +# def test_blackjack_mc(self): +# env = gym.make("Blackjack-v1") +# episodes = 50000 +# from irlc.ex10.mc_evaluate import MCEvaluationAgent +# from irlc.ex10.mc_evaluate_blackjack import get_by_ace, to_matrix, policy20 +# agent = MCEvaluationAgent(env, policy=policy20, gamma=1) +# train(env, agent, num_episodes=episodes) +# w = get_by_ace(agent.v, ace=True) +# X, Y, Z = to_matrix(w) +# print(Z) +# print(Z.dtype) +# self.assertL2(Z, tol=2.5) + + +class TD0Question(UTestCase): + """ Test of TD(0) evaluation agent """ + gamma = 0.8 + + def get_env_agent(self): + from irlc.ex10.td0_evaluate import TD0ValueAgent + env = gym.make("SmallGridworld-v0") + # env = TimeLimit(env, max_episode_steps=1000) + agent = TD0ValueAgent(env, gamma=self.gamma) + return env, agent + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.v + + def test_value_function(self): + # for k in range(1000): + trajectories, v = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + Qc = [] + Qe = [] + for s, value in v.items(): + Qe.append(value) + Qc.append(agent.v[s]) + + self.assertL2(Qe, Qc, tol=1e-5) + +class MCEvaluationQuestion(TD0Question): + """ Test of MC evaluation agent """ + def get_env_agent(self): + from irlc.ex10.mc_evaluate import MCEvaluationAgent + env = gym.make("SmallGridworld-v0") + env = TimeLimit(env, max_episode_steps=1000) + gamma = .8 + agent = MCEvaluationAgent(env, gamma=gamma, first_visit=True) + return env, agent + + +class ExamQuestionTD0(UTestCase): + + def get_problem(self): + states = [1, 0, 2, -1, 2, 4, 5, 4, 3, 2, 1, -1] + rewards = [1, 1, -1, 0, 1, 2, 2, 0, 0, -1, 1] + v = {s: 0 for s in states} + gamma = 0.9 + alpha = 0.2 + return v, states, rewards, gamma, alpha + + def test_a(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(a_compute_deltas(v, states, rewards, gamma)) + + def test_b(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(b_perform_td0(v, states, rewards, gamma, alpha)) + + def test_c(self): + v, states, rewards, gamma, alpha = self.get_problem() + self.assertEqualC(c_perform_td0_batched(v, states, rewards, gamma, alpha)) +class Week10Tests(Report): + title = "Tests for week 10" + pack_imports = [irlc] + individual_imports = [] + questions = [(MCAgentQuestion, 10), + (MCEvaluationQuestion, 10), + # (BlackjackQuestion,5), + (TD0Question, 10), + (ExamQuestionTD0, 10), + ] + +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week10Tests()) diff --git a/irlc/tests/tests_week11.py b/irlc/tests/tests_week11.py new file mode 100644 index 0000000..1f58dd1 --- /dev/null +++ b/irlc/tests/tests_week11.py @@ -0,0 +1,200 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from unitgrade import UTestCase, Report, cache +import numpy as np +from irlc import train +import irlc.ex10.envs +import gymnasium as gym +from irlc.tests.tests_week08 import train_recording +from irlc.tests.tests_week10 import TD0Question, MCAgentQuestion + + +# This problem no longer exists. +# class NStepSarseEvaluationQuestion(TD0Question): +# """ Test of TD-n evaluation agent """ +# # class EvaluateTabular(VExperienceItem): +# # title = "Value-function test" +# gamma = 0.8 +# def get_env_agent(self): +# envn = "SmallGridworld-v0" +# from irlc.ex11.nstep_td_evaluate import TDnValueAgent +# env = gym.make(envn) +# agent = TDnValueAgent(env, gamma=self.gamma, n=5) +# return env, agent + + + +class QAgentQuestion(MCAgentQuestion): + """ Test of Q Agent """ + # class EvaluateTabular(QExperienceItem): + # title = "Q-value test" + + def get_env_agent(self): + from irlc.ex11.q_agent import QAgent + env = gym.make("SmallGridworld-v0") + agent = QAgent(env, gamma=.8) + return env, agent + + +# class LinearWeightVectorTest(UTestCase): + + + +# class LinearValueFunctionTest(LinearWeightVectorTest): +# title = "Linear value-function test" +# def compute_answer_print(self): +# trajectories, Q = self.precomputed_payload() +# env, agent = self.get_env_agent() +# train_recording(env, agent, trajectories) +# self.Q = Q +# self.question.agent = agent +# vfun = [agent.Q[s,a] for s, a in zip(trajectories[0].state, trajectories[0].action)] +# return vfun + +# class TabularAgentStub(UTestCase): +# +# pass + +class TabularAgentStub(UTestCase): + """ Average return over many simulated episodes """ + gamma = 0.95 + epsilon = 0.2 + tol = 0.1 + tol_qs = 0.3 + episodes = 9000 + + def get_env(self): + return gym.make("SmallGridworld-v0") + + def get_env_agent(self): + raise NotImplementedError() + # from irlc.ex11.sarsa_agent import SarsaAgent + # agent = SarsaAgent(self.get_env(), gamma=self.gamma) + # return agent.env, agent + + def get_trained_agent(self): + env, agent = self.get_env_agent() + stats, _ = train(env, agent, num_episodes=self.episodes) + return agent, stats + + def chk_accumulated_reward(self): + agent, stats = self.get_trained_agent() + s0, _ = agent.env.reset() + actions, qs = agent.Q.get_Qs(s0) + print("Tolerance is", self.tol_qs) + self.assertL2(qs, tol=self.tol_qs) + self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol) + + # def test_accumulated_reward(self): + # env, agent = self.get_env_agent() + # stats, _ = train(env, agent, num_episodes=5000) + # s = env.reset() + # actions, qs = agent.Q.get_Qs(s) + # self.assertL2(qs, tol=0.3) + # self.assertL2(np.mean([s['Accumulated Reward'] for s in stats]), tol=self.tol) + +class SarsaQuestion(TabularAgentStub): + + + def get_env_agent(self): + from irlc.ex11.sarsa_agent import SarsaAgent + agent = SarsaAgent(self.get_env(), gamma=self.gamma) + return agent.env, agent + + def test_accumulated_reward(self): + self.tol_qs = 2.7 # Got 2.65 in one run. + self.chk_accumulated_reward() + + +class NStepSarsaQuestion(TabularAgentStub): + title = "N-step Sarsa" + # class SarsaReturnItem(SarsaQuestion): + def get_env_agent(self): + from irlc.ex11.nstep_sarsa_agent import SarsaNAgent + agent = SarsaNAgent(self.get_env(), gamma=self.gamma, n=5) + return agent.env, agent + + def test_accumulated_reward(self): + self.tol_qs = 2.7 + self.chk_accumulated_reward() + + +class LinearAgentStub(UTestCase): + # class LinearExperienceItem(LinearWeightVectorTest): + tol = 1e-6 + # title = "Linear sarsa agent" + alpha = 0.08 + num_episodes = 300 + # title = "Weight-vector test" + # testfun = QPrintItem.assertL2 + gamma = 0.8 + tol_w = 1e-5 + + + def get_env_agent(self): + raise NotImplementedError() + + def get_env(self): + return gym.make("MountainCar500-v0") + + # def get_env_agent(self): + # return None, None + + @cache + def compute_trajectories(self): + env, agent = self.get_env_agent() + _, trajectories = train(env, agent, return_trajectory=True, num_episodes=1, max_steps=100) + return trajectories, agent.Q.w + + def chk_Q_weight_vector_w(self): + trajectories, w = self.compute_trajectories() + env, agent = self.get_env_agent() + train_recording(env, agent, trajectories) + print(w) + print(agent.Q.w) + self.assertL2(agent.Q.w, w, tol=self.tol_w) + + pass +class LinearSarsaAgentQuestion(LinearAgentStub): + """ Sarsa Agent with linear function approximators """ + + def get_env_agent(self): + env = self.get_env() + from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa + agent = LinearSemiGradSarsa(env, gamma=1, alpha=self.alpha, epsilon=0) + return env, agent + + def test_Q_weight_vector_w(self): + self.tol_w = 1.4 + self.chk_Q_weight_vector_w() + +class LinearQAgentQuestion(LinearAgentStub): + """ Test of Linear Q Agent """ + + def get_env_agent(self): + env = self.get_env() + alpha = 0.1 + from irlc.ex11.semi_grad_q import LinearSemiGradQAgent + agent = LinearSemiGradQAgent(env, gamma=1, alpha=alpha, epsilon=0) + return env, agent + + def test_Q_weight_vector_w(self): + # self.tol_qs = 1.9 + self.tol_w = 7 + self.chk_Q_weight_vector_w() + + +class Week11Tests(Report): + title = "Tests for week 11" + pack_imports = [irlc] + individual_imports = [] + questions =[ + # (NStepSarseEvaluationQuestion, 10), + (QAgentQuestion, 10), + (LinearQAgentQuestion, 10), + (LinearSarsaAgentQuestion, 10), + (SarsaQuestion, 10), + (NStepSarsaQuestion, 5), + ] +if __name__ == '__main__': + from unitgrade import evaluate_report_student + evaluate_report_student(Week11Tests()) diff --git a/irlc/tests/unitgrade_data/BanditQuestion.pkl b/irlc/tests/unitgrade_data/BanditQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 GIT binary patch delta 102 zcmZqpz}oPEb%G_+ABK(A+J=k=Hm4g#8!)#$c9?uLQJwMY=C_FsnvDIM^}0PYnVJnI zS8P#a`sT2C_7*-vrr?j8S3gxSWS)O!!t`@uj5>^8w*MAmTw}mEYx@d!Mt=j&w(kiH J3=9mVdH{3RC|m#l delta 104 zcmZqpz}oPEb%G_+e}|3M+J=nxH>Vp$8!)#^9GHAGQJwMI=C_FsnvCt6^}0PYnW|JK zS8P#a`rfd4_7*-vrjT2kS3gxSWL_ZoWBNHUMxDuT5;?Xri8C%WV7$3~g*&6a0cSh& L0tN;KhEhEMxyUEE diff --git a/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleCostQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 GIT binary patch delta 142 zcmX@EbzEzLJ@bmW4<<U9Gai^&HH%45VX_$GPDb9z7Z{ZlgzmY0U|?X#Fv?(So07rN zHl=n7l+DsM#og}IWML)~PKgH$Adq1+`5>Fz<~*kJ%#05v+p(#!7;|OiOwM2nV3G^i oyo>EI2b1)J$@BTuna|Ejn0%5yfJvrcv$TK;E5`zmc7{?t0E-_jF#rGn delta 142 zcmX@EbzEzLJ@YD;6BC`x8Ba{Cn#Ck8Fj<UoCnNvl3yjJN!hWkJFfcG=7-g`vP08SB zn^HRk%4TVs;%;|rvM`egr`!St5Xdl^e2`6Ua~{)qX2ycac5G@aMxWS1CTFk(Fv%8d o-o^HqgGo7H@_c@E<`26YCZFUFV3Kv%EG?kI%JBfCouO0@0RCz$00000 diff --git a/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl b/irlc/tests/unitgrade_data/CartpoleTimeQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 GIT binary patch delta 142 zcmX@EbzEzLJ@bmW4<<U9Gai^&HH%45VX_$GPDb9z7Z{ZlgzmY0U|?X#Fv?(So07rN zHl=n7l+DsM#og}IWML)~PKgH$Adq1+`5>Fz<~*kJ%#05v+p(#!7;|OiOwM2nV3G^i oyo>EI2b1)J$@BTuna|Ejn0%5yfJvrcv$TK;E5`zmc7{?t0E-_jF#rGn delta 142 zcmX@EbzEzLJ@YD;6BC`x8Ba{Cn#Ck8Fj<UoCnNvl3yjJN!hWkJFfcG=7-g`vP08SB zn^HRk%4TVs;%;|rvM`egr`!St5Xdl^e2`6Ua~{)qX2ycac5G@aMxWS1CTFk(Fv%8d o-o^HqgGo7H@_c@E<`26YCZFUFV3Kv%EG?kI%JBfCouO0@0RCz$00000 diff --git a/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl b/irlc/tests/unitgrade_data/DirectAgentPendulum.pkl index 30dd1062d1dd64f89fbe4a1d9559ec33ecfdec49..8d010a1467db4d221532f2ee0a7371c71c132147 100644 GIT binary patch delta 22 ecmaFP_?&Tq9n*J)i4OBPh1Tp2U|?V<)dK)xlLveN delta 22 ecmaFP_?&Tq9n&|5i4OBPh1C)dFfcHb>Hz>^2M0_5 diff --git a/irlc/tests/unitgrade_data/DirectMethods.pkl b/irlc/tests/unitgrade_data/DirectMethods.pkl index 5b7d595636172fded4067cf5f187d482614b79ba..023619080482d1b79f8bf25480a74ab9f4f9b6a9 100644 GIT binary patch delta 46 zcmdnYy_tK01@kVp2NSI~GI2cE`0fxRlPJUHO-$8{Ogsvk^;xPJIXM}?pi~b4dOZyA delta 46 zcmdnYy_tK01@rEWXC_*2Wa47j`0fxRlVrf=O-$8{OuP#=>$6lda`GrJfIz7p0EZF{ Aq5uE@ diff --git a/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl b/irlc/tests/unitgrade_data/DirectSolverQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 GIT binary patch delta 142 zcmX@EbzEzLJ@bmW4<<U9Gai^&HH%45VX_$GPDb9z7Z{ZlgzmY0U|?X#Fv?(So07rN zHl=n7l+DsM#og}IWML)~PKgH$Adq1+`5>Fz<~*kJ%#05v+p(#!7;|OiOwM2nV3G^i oyo>EI2b1)J$@BTuna|Ejn0%5yfJvrcv$TK;E5`zmc7{?t0E-_jF#rGn delta 142 zcmX@EbzEzLJ@YD;6BC`x8Ba{Cn#Ck8Fj<UoCnNvl3yjJN!hWkJFfcG=7-g`vP08SB zn^HRk%4TVs;%;|rvM`egr`!St5Xdl^e2`6Ua~{)qX2ycac5G@aMxWS1CTFk(Fv%8d o-o^HqgGo7H@_c@E<`26YCZFUFV3Kv%EG?kI%JBfCouO0@0RCz$00000 diff --git a/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl b/irlc/tests/unitgrade_data/Exam5InventoryEvaluation.pkl index af00f83a914f3ccd605d208edf577a680f4b4822..7668f6c5761fd79c62f158071fbd494d96a842b9 100644 GIT binary patch delta 27 gcmcb~c$0C0Gt>726J0%+Bnu|?J8>v5fIz7p0F5CC7XSbN delta 27 gcmcb~c$0C0Gt=*eiLM?@iVYL{oj49KfIz7p0FLnplK=n! diff --git a/irlc/tests/unitgrade_data/Exam6Toy2d.pkl b/irlc/tests/unitgrade_data/Exam6Toy2d.pkl index 35da329cde908ee0c76542e26e45a260eda7f19f..f297836116e242443741fc97c303a7586b38e0e2 100644 GIT binary patch delta 27 jcmbQuG@EIHDbtq&6V2x^wQrbs)sCa1fq{X6p;Qk5mU{`R delta 27 jcmbQuG@EIHDbrVmiRN>do+V7YYR56*0RsaAL#ZABj8O?< diff --git a/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl b/irlc/tests/unitgrade_data/ExamQuestion7FlowersStore.pkl index d94ebb5d1eaceac887aa37880e80bc64f85537ae..857d1b44cebdb0612421c1ec990febce99722f8c 100644 GIT binary patch delta 27 gcmdnSxQ%gw3)2^eiEd_0CN&f53^)`RK%i6)0DJcbLI3~& delta 27 gcmdnSxQ%gw3)5$YiEd_0#(yT(8E`Z(fIz7p0DzST-~a#s diff --git a/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl b/irlc/tests/unitgrade_data/ExamQuestionTD0.pkl index a00b2d148c0fc04b594b8b9551574d5f265a43b1..4f921dea73c279142e67fb44ce9b2c57aa0668ce 100644 GIT binary patch delta 25 hcmcb@e1&;}71MWyi8fQ2J~K?bT+Q^lVR8Ut3jl{U3Vi?o delta 25 hcmcb@e1&;}71NJ~i8fQ2zB)|2T+Q^BVR8Ut3jmNT3c>&Y diff --git a/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl b/irlc/tests/unitgrade_data/GradientBanditQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 GIT binary patch delta 102 zcmZqpz}oPEb%G_+ABK(A+J=k=Hm4g#8!)#$c9?uLQJwMY=C_FsnvDIM^}0PYnVJnI zS8P#a`sT2C_7*-vrr?j8S3gxSWS)O!!t`@uj5>^8w*MAmTw}mEYx@d!Mt=j&w(kiH J3=9mVdH{3RC|m#l delta 104 zcmZqpz}oPEb%G_+e}|3M+J=nxH>Vp$8!)#^9GHAGQJwMI=C_FsnvCt6^}0PYnW|JK zS8P#a`rfd4_7*-vrjT2kS3gxSWL_ZoWBNHUMxDuT5;?Xri8C%WV7$3~g*&6a0cSh& L0tN;KhEhEMxyUEE diff --git a/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl b/irlc/tests/unitgrade_data/ILQRAgentQuestion.pkl index 9428ff4694b4dc2cdbf360f286f3efccaa252b72..547d7a9f7b6ad4938087db86e35a9b36cee09e65 100644 GIT binary patch delta 22 ecmX@cbc|_&4b$(2iFW@vPwnSnU|?V<)dK)%<p=2i delta 22 ecmX@cbc|_&4bv}%iFW@vPsVjIFfcHb>Hz>^HwT0O diff --git a/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl b/irlc/tests/unitgrade_data/ILQRPendulumQuestion.pkl index e365fec395fbccdf16de93f115cac629916b4b03..f94cda42c98f0cc556ada3f739585ba233fdd584 100644 GIT binary patch delta 20 ccmZ3*w2EnhJ>!pw4v#nvEMQ<@U?|lC08tMH-v9sr delta 20 ccmZ3*w2EnhJ>%br4v#n<7BDa{FqG;608t1A)c^nh diff --git a/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearQAgentQuestion.pkl index 69c70ecba4954fbaf7505af3246803e1403042e6..0af1b2cbfdd1441804be919a7071995cd7bd6acb 100644 GIT binary patch delta 2668 zcmccffbq@)#tHUJ-x@YLn((vOvy1MZ+{h?7SwM!DT}O0(-Xh^$lNALNITn5r+5c8L z%?iv}Fh^v+1>+hB$Nr`8{*wvD)*u<SXldd7GEXK?6jkP6DHGa%DO?pS&JkZOxSxAT z9fXs%L}34hpFt3g{9gY3H{8-8oZXFl`_(tig>Yodc=wy1Ukc&4%kk`A{;C_oaoEYV zzq)x7gk$T?xqmU^b_l0vD*OJ-r?(&+joGaGMO6PnIJS?O_cwpN4dImNGwt_!audSo zbY$2+{p&Xfr_tf>KKCUTAe;oLU;FesPC__~CV$)4yZ#}BBV_h@-_zvl5Dr`G`+aiG zZy=n7y>IsM-P{Y|<X?KZZ}*vX5KhC@XZz+^?}u=vFM6_1V^<G^bIJDczNtw@5Y7&@ zNBbChxgngO_=o$JrX7cbv`WsyeeEh|cT83kP-1s`xR2%76iZO3bBGo{+~?D|VJC>g z?p^$7pS?80WJM7rjw$w!_Z{~*1CB!W*2pLOLVxgpB-j<5p6v_uD*>}_e0{NR)!Q{- z_VoNW`!?p^1GCpIdcSY#(KBGSN&n}4a}<Ar*~?aa-xrd{Jb9v^61!W$pM4BlB*5&N z*9`j=ujzo<D^@V?7xLBzv+rrJ?XTrk0JD={aqM?UG=Q*|aqm}PfT+=$$+v&^MjNoW z@Ls|F{=YoHY=KUZ{VM;B!0c#C@%<}=Am(RrN$o%3YyuX~-72$x;{h!&`&*g(e%B<3 z`5!!$_V?|Bs4>=5-GB3fIap0ykNW<@mmqFl_(^mBiVjJz_;pd8{jFy?!0dY)boZw< zp9TAu*+qZ<lu0MS>}F4c{W=Glz-(Pc!~LBr&B5&8)rR|{)wDotj$?-n_phFL7#tfM z&$|ux&#a4wMBr>!!~Mn6dmtS5JcIo~A>ojCzUQI8fA*G15YF+by8D&8p{euVE$#hV zQtpCf*uUJ=+#lz}FnObx68nM>_5J@s6v6CLf0g~Oy&S>p8|M}F>pqVMv)9PV?&s^y z0kd5jCHL=^Ob4?)>qPgr#W#T2r(*^8Z|83Tvo$h#_pi~Y1+#CfaPHsQ*#TzHxW%&H z+oKcAR=>iq-|!qnZ=}x8eIEPU!Qu<9eAqX+x)sb8HhH;knqUc-&3^6SzWRgNV7BXt zTl;)=B!bx|rd;0Fou39~@AWvduc9#%%+C3KbRV~t7nuD^<G?<TbCzIsMar&y_7g0? z><gQ>?E58R24){o+pw==p&*FOalvuTzCSiU!GXo$)UjgUq~v#CPQ4@BvVBGk3#_M< zOi4<Oouc8*;LYUC2*w~50}>k~%H$0Zfsh~(2#WzM&Ir{F(g|XNL?JprESPE#8)OGa zXC#ONQV!yPltLsBBuEUTkqPc3kiiIfkUoe!hy_vwp&2#@DtzFtx7ZqUdY<y<{YT<7 z609XY?YD7_s?nGEu)li3Jf-%Im-}1NohBcd^kToWU)sM-C!X%VmiyJw+UMi`iRXTO zzM%hce|yreI6s>&`|Yf<RL?B`u-}rQXUpb<cl%2}+p+ULez~8&_r`){HIMffzbo5o z()w)wnRh1-JL!Jfe|z(DBU8<<_50cR99_RfeBRHq`qkXm#vk{K-7qqVmH4#3lmE@d zRX<+rPgD8+HsRp2{lWfQo?7pDwf|Xdc^k{}kNYp|-?2XK!H4~KR^8LQAoXOwisuF6 zrR-1kdzTn+R+>NCKljXqn6nk1_PeSGwJvo0ykBh3*R9%{KkP3}s+RV9_-6mL=ULj{ zrq{pQ|6fnZ!KU%u{+0|5rDt!S>=#|<{;*2q+5Q)ax=)r?KH6XY-|-r^)4TmD`=_g} zTliwX!rz|^rp9mgd!Ij@$h+;){<W(Arl+2Lzu$7=r5k*kp6<UC<9GPr(Wm?WY^tke zeg1I&<;3~<tdpMYj}0$d$=UpP|D<p>10|39`x!u>p5ej%t~>WG-73Ai|MiEOFP8fs zz!jXd`NYob|6u<P#ZcdPwX6FNebtERKXi5f@5=pUS6A%WKWiok?A$*CM9n<8-=u$0 zzSQB1`=z><ZJvDa`2JmTt*--joZJ6>((jyQk+=8HzAvN}vEjk~4W&FbTbu5H9Z(N) z1k9kffji_{Umx7>|1mdRcH)EmA4TUf6kU0+-y(9J%t@iU`^6P4t0F2dz%793^D2-w z`hN4l{)smwtBSoI?yun%l~;0pxPL;=9s7ou<NJ5WUAdd{=j47Le|{csn@jsQKaYQR z>fQPMA<xq4)um2cg-f(gHVpNd`f&d=yDvV+uHN6TJ4r15)1sUEH8eGR6PzyZclUXn zo3U%xey_(u$uB;f-9H=b^PBtcn(*HE9eQ>Dqe6Y#>ZdoLA#-wn5Xe7)C-(b#JmEjQ z`S|{P)7L%kyRYvzI`Kp41n<@T@)q?MrYXI?wx8pCi(04Z#r;yc5*Ok>FW#ROqAA8= zeh@DHaLN4M!fEIBPkW!mTH1DTztcsV2G_cyP>Xi%@B8;#Y4W~R`$M1>LPF-`ep%J_ z>#YvQ_HXdaoe`dLVgI%hW&7q!UElxY^Uss&i?8ix0Ea&V0|P|EPH>!nqC`hV_|who zd;2@*<k+xGzP6uhlSi86;amH8uP-jJ<=VbK6vd*Q`yWY&I|MAbwSPn8@AJ*uZ|#@u z*uD8^&+Yv@KW4JE^Pk;6S%M{uPj?gC!7<vFS?@Bg?XP!oY@M3abz=Xbvb$$L)}P&< z(BLod)aT-U3HMi@Pm3Pi|DY;;o93NM`;S)`p6F0M4Hy5c!L*?5^$EE6^e<L^|0ZsL zM#;(j0)3)>i3hj9J%sK-kS7@!7&ad;X|$evz=T5@RKB)N$zaT2Ynzh6(l*82zUv`a HQK=pP93qTp delta 2638 zcmccffbq@)#tHUJ-yJqOn((ujKe@JVawDVU<O_m491D(K+h@G9#B#EtfFj2p?(6&b zEqHc;IkJ{F_8r<Fwj0c;aJ{+DTbB*O`LyxYzW+965DvrtJNxRKCqp=aOYiTCp0X0c z5mbJ>?_U1>-IEigl-RSM?Q2)M1eRd`Sodn**(|2X8^x5^qb|MMC!EU#X3x0wY2RUX zMlf4v+V_1`MdDyKQ{|t1yP{;k?8s_{{Tkc&!R$0P=Kb=Al|XE^V>ej$|C>HpQ9_xm zZ8gXKr|UsX_6_FT`xkI2gH*7W{^s59{Lui+X51jK|B;d|n7x2cc>m$$5St!P72W^f zf+ASF^1t~0$W$pX`>LDNe$nfKAU21#n#}$`tKNgX!XYRtxBpAgZwTko68Zg&4-P{( zb9X81KlQW=!eN-Fxc_ESGKBNAPig<6Cj}5rGl%m23Y*W6An)*2-XF=?2H`NzRNk-g zZ5D(RmaDw~;buK6P-L)QIH9y(rcGt?L_sBX<#&qvebz*Q+35<3`-ArQg4zG?%J1hg zSqNrZYRc``PQC?ZA8?b|Z(;Kh%<lXlx&LOb@Z^iqO6(twitqPa#sg+E7>Mq_Uabmd zE7uC`ci652X1^`q-@k#^8O*ks!n0pe#~jSA$mHDrYl9b<ZDYZ<U%=l7%r^93-oKW~ z2F!jb&9MKspf{M!_U-4sIhGz^_QT-M`{ucXfZ5t9Z})BfW(#Hut$)5xXR#TW?X>6N zKE5bBF#GSEJNx=xD1+JaL$2>T$E^uwzstY0Pjs3Ln0>J1?7n%a@?f@k(aC)Qfec`F zS^lwoCL0cb{eIxx;e8#t3&8C3!h`$7!sdh7uNV*P+rG~n#OBy6zkeU6`6F<kbC`SX z+qb&<8id2wwr?MQ^yi(R=-}8Bw13~twPzp^byn-ZzW)L@z#O&@LWlO%3QnFVs?2ey z<;cE?C37H=F7W^OzUImIz#R5n)o1n@O@*ZB`EM@ln<v8uN@wiX&RpI1nGKTa+l6oM zlf9+^7MHMkxKHzv3z)58`eNU^#CR}!<@fjd3<D~_?5jE7_f;>=1G6V_{@*ucdNG)7 zypVanZ*Mc0eS(=||E!=MFq=z}XTRwRi1<7sf&FzxO<-}8_rm*Uspf;(M=y!*cYFdd z|JPON{Y%msz~ZOZ%kMWRDFd^mBbE0{hh%}-r+C!&PkfyNX0L40+JC1!7R=WCrn{fz zK@6B3m}t2F>B%TC+tJ!&zkH1un0@?!+5U_~a6aU)S!1!kY}Z$CU~$Z=wc2mJ@dcPu z-}b?J|C-{RyQh>)NlJ~KqT$Wt&FIYlB_S*ZZ$@t>Zx{)qVPX)P391988<_^_1@U1t z#B>mk(OVou!6cE4p8QZ=SRZ6P1KcqPZ6I|ZIS@vOGHwo3_`qL3&*#789J{;w54rx! zIe+!Ve*H}=-ZH;?xj*9M;=)ylulBc|;aOu;@OJ;_Kf5%lrheK#-T1cXuLm#oYfCd5 zRf;{>zi5m3k3%W9_OC0i-xI#}#s2EvlkKh>-|avB?&M*owJ-Odd;i;9-|yA_=`3+F z-hH3;pZgyCc<JmH`%TyQ6;IE6TEAcI%9F)*Z}03s(@}2wQSa&ghN;foInQ73Z|&WG z?M=Xo{an`<7ua$=-yf~u_%Zp+r~P@SQVZlhzuNE8o%8(n%7^=TUa=J|+4f++_U5(R zPwF1-&zShO#@_Va{u>{<rXR3)x!-YK-I;qOAND`FX!PCG{KbB6^G?Tvr*HO0IoZ9q zxu*Wr{^jp?+GKxxyZ^GIoQO*LyZ!ef-5#8N_<sKcNon?8=1==gIaqC3{I2Y0nDpz9 zW8S6x|D1oleSP5W{;-@6j?bJv?!WfV>-z59ANT)waz*-y#K-;WT&2@@Zg{?5<e2MP z#=1NE1zrA#+`Id7|Be#AX){_L?SJG`@vkfB`F>f8beGKf{0I9lgnZFioBd+{j`hsX zdo-T!f59U7KVaUo{hTLCv>PLC?0>Oh#T*sioBNl2kDOWTd~bh&N6_N8x1R4$Fz?O? zVtT&+Y`?N|{DSBEJ-&0Tb(Fcb|HAqCKi#bF?w__Z_Ntb`qy2ME9Ti%k`DFi=$gewZ zbv@qyZ{9VBG=_)!85kHC>KPaw>~{!{-njqGt^K!sC0m|)-`TGf(|_oz#-06LzN=PQ zFTDU(2vTR(u-&2Q(dGR*ZAD(1=kM=dC6u@*;m3vjCdMFec|V9WHom++AwhTLg2QL_ zYe3E6)7>=p{qlSJ&loBwce>u*@A0roWS#rN{kuJR>Y23LAMM|xx={C-$>aSy<XT?` z?l`x9_Dm2sx&Lgkys4}G(fx<Mf{ePl-znkg1*O{;_s5#oD1Ixrw*Lt~+q`Q2NBae! z7Bp9^4V=mTaR2K-kVyyk&jJ}UbLakB{)r0>=C0WvR#;K}KKSYW!mhq2pX8tIm$j+S zuZrCA7^?8(exXOFZdxzCvwz#?zgH?=9^1d-7$~{i*e`df^86vS3;S<8_pVx&cX$5- zQ57wr+jpV*Ztgd$IsLxl!j1iWA;<Qg-*E;izH`6&GUkg1^VaMSP^vI`x90#{9OkeE z^&4t9B^7S$&pfv1<(rfn@SrU1>Ns#z^dL0oK`{lv0#1KVx6RxHH>g4FP}}nGUGTVv z8QkRLWVY|wrTq%)#H~+stlzIve^CD8ysP`?vpWYWyI+KxTi^bFvcO@tg>ZpWURx{s zqPOo40)=4UiT%R&ep;HxZ-iQSa{q#}tIl5y+q1td;%oUA<)cvXopAAAmp*LW_+kfK zJnEKoO`_4%{V;KeH{j&v1162ulMk41NP|k$wka8m8EkD+GFaNCxZ8J6bO5O<)dK)J C%do=$ diff --git a/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl b/irlc/tests/unitgrade_data/LinearSarsaAgentQuestion.pkl index d0e913ff8885e29f4287fa1e21d720d5ee6fe0ed..f1d8ea1c4051989e2d07c4d440e9ea4f2aa3b24f 100644 GIT binary patch delta 2349 zcmaFxfbqct#toJHEaK0__fHmNlALTH$ip#}O=5rl``<e!H}WfTgt|)XXJM}11?F6o zm)Ji?NX!bvVfURTzJFOW=VU=KCHAn_V*8c8Yk}Fi3S#?NHNC;?Kr_+(5_dAe>`!ln z_ZR9f0ki8oh4yP%T>`UJ&I{~6tg{!)-rC5&zi9h1Fnet>-+m94elUBnDewM8EYrd4 z+dsMYTRoWzW~&!+?bnSu0%n)b<Jf;G{3V#}-O9HAl^5IOg%V2aCfO|eFSkP2Rv}FL z%l5H?#b32D?04084pv{t{%>EY#6d7yY~k;HFQ&}|vyHC**mtL56PPWv{_8$gwo_pC zlZ~JDP4V3iX4^e|zb{X7GnifB_jX_V#-(6(SLy40w&gis_WRgZ`z+6LgV~drU+#0$ z(*?5`GN143@$?0;IpU^2+jn{4MR4G9m`!-LZ`X$u3s6XNv|M<$uW>d5Bo5AnKi}8l z)ehlEsJ_@Y%kwOl!~SXK%YE;2ye5AXP-0IOezPwiW*V4%OYPmhjdxqX?CcNk_XV6t z0kdxuecG2edjXi8u=49Z{zYfO>=(U1_gz)|31)wo`gb2cBje<QvP$fa?=b9NTg3uq zyIC>se;pwXX3x6Ey8oGoCYXKt2*-Z^9!oIWJcWDzt6S<|_SNmY`{z6t0kd801oq#V ztq5js{v@=&Ls}QiF6<NCKmENLm_30<V*iwOc`*C3n$-Tg6WG9PpPSPAvu$pI-S&8~ z?EXXEPr+=DX8HZ$o8E)jbw-N&8E0PyvkTWL?YH^47tGH6qP(B$-y$%3owMrx0}Gae z*|%n@?dK6&4Q5vts_$R2ARNTz5Im{AzesyMIB+?Hl{NO?VBP}ZJgn2$KVj~62q$os z#{RO#+mP&HT&l4@wORxmiR`xL)b~&Pss@T(_8H66_SamR3}y#1sP6Aqz6@qRkyqYd zWcC5f-prx6-|!41b$0KQ+t1P^1Wu{y?K1lrZt{ZJ^V6jE|5fAwv*YC@_Df$71hZFd z72O|^D+OkA=?U+@_goRoKIkp5-(`{onC&IPyMMu5UoiW0FX#T_^Ao{r*8Qyer)?|- zvoCyL+#lQvVSftwvv1MVLa=yu=GT3{a$>>k**fp{J)i3iW?x_YVqd>RAecSj_QQQo zI3mI9vIn>Jc_hb!*`@n0?~_nS0kc<eoZXjxGYrf&IB{&BF~0|xE!1{k-%e);+u3*5 zzI&1o^JnXA-N$oP8LZ}j(Z+oXzWoD-m0{G{eMgnQf!GQ5t5@!8-1FLcO39R@)YvH+ z-VELh-b~(1-i%-fVu3^$;5>*7L<}SgQ3v9Kcpw@@WU{t`?BoUVJj{&V43j6y3)@2U zGlK1bs08T)VGs|bAH)V>5FbQC#Xzo`Y{)FUIY;3pfBl3@@9j^0{;;1-sfz90_t*PP z`_{SLV0yIwp~|7aHCLbPpZ_5BU6c32{gW@2Rn0nheg7Sm37mG`ul9#>2unV^_hJ99 zbDuI?{=M73)b8k;y*J+Me<6^_cYFKu{hz8@V%1MS+uvO;s%)+FbpNJXg$jHvxA)71 ztEc@s_-6mVq^_V^^H25r9eu?a{enO2zgSs6%~}5KesM3i19|VB?Z43c=FDf?=lh#I zjdvZ>f3p9=><P7z2XF5$xT4mrcH!0jm}_oV)=IqJUuPBeLdNas{yh(i9zS3EVt<WI zlZEm8r~5l4tvFxZf3|<74!d~R#^?JFEcMf0zx(C>XQf-3uZrB;e_&p=cQ;r4gZ*ro zt6Zv9J=nk5+o0!1{geG}vrkSgw|u@o*lkX{e&n<L$(o$%`aUoB-`<~Qv&-k<{%id^ z-lctdvR`RW^Y-lLclL8yGmAM0KHgt%5G5wbcWwX0)+AZhH4pdillXh)#e|Fd+gbUp z&YN{@zsGy_wvBsk@3(u@%b2cldB4lqY$dk(jW_n+G+)Iuao^?rnzPr0@B4dY|0z~Z zwxu7h?BCbI<iAVg&VHqbd>dad-Q9oOQ$0J=_|AU0zSSlHswek9`t~PwZsXzor7Jjg z)plRn-?vtC!kxo6_GcYT;P`p;;(oKV3+lDmclP^be9&62ae2Su)T(1k=iS+#A9qsv z!-~uM_c?v+tN*m<;C==uh`slsdt=E_I3Fs^5E0-}Sa#^Ze#4?EPKTl{>^EyVHKF7B zmHnyS0;vX5F7H3=og4jZ%If_eyN_%N;X1JY<(eZ~;tY1~4=dQx9eiow{_2Vgd3%<e z++QuS&i!H4mi_TIOP*csIk10?kxjkcd)34HR~X&7_LTMdexn)xUz{txynnB7bn<!I zv-`W&erVBsesTYX>_0DxyH4&;_>;bIVepy#e?B%asjD8}&+;Ye^~tDX`z1d;+48ac z;C`-6{Zc7G+xC|^hcA7&^w@r>%@6O`*KgmiC25yk{N>Po&m9K>)@xkbFKwHUd(HIZ z{`XVs<?|hs&cQW2+4b9`VZ*uov)<e;I<0wh|MJ8|-_I%U*)Mba-rt-zyY_o^dv)v$ zJGx(5SL;vB>Z5RRop#;(?^4cSh_gI%h+>G_0~d#dC|q0|Y7k7~thLx<j^AtdZ!R>s VVm-ORWX@#Q0$~pA4IueaJpk>O5tIM` delta 2499 zcmaFxfbqct#toJHEEAoc?3*meBstkYkcT6z@5#O=9pbwtH}WfT1gbvW7xH@=n8RM( z`D~w|sL^CWF(r1<P0#ne_7MTGIX=yKu}}W)D~O_czgPQIk41na*t-wE*~j5L7tBs7 zdB5+K=^8Nmee|b&c8_<0+4DYr**7_RDVW`2@O@w3&VDevLF(5&iTYh&Hp{BN`yM}d z3}#PX#jwBS)GIK%ri*F+v`_EB><RiT`+s#_0<%|7W8EL#yBW;Zbzt9bcJK<A{Y-;% z|LmAAVD@5e?)`~zAHnQnPk8n_O?(4lbNGJZ+kYvMc@Nn8J_dsOuh;W~Iqccu!u#F( zKZ8`Ub1xCu|Iz;#n0-t{Y=2Sg4lw(1n)v=zd@I51{F@T{CEXyt+?*-3|ES^#u=x6k z();=TO$M`V{AKnR_S%BkEstdOe|HxKu{p}TWcLd^J`WC3j=(nA{XE(E5RPn!?Ecd8 z9#)eL#g*8$$n4+zRDSYD0VVe1%cb|9F>eF2FKw3E|KdwAm@Ripa=-kRNHBZHEs6ck zY4u=s-FET)u5%}X*&K#q`_Ee)2D8n)ME2jz_zY%ic?j*-Zf2N#P*#a;qn^Ni<+#a- zLdxu+&V2ht*L?*mS<A+=Ur37|q=cQJmUF*ewi1}Fvzl%HhY}Mod#xnP{+;nEVD^h{ z#{D*h%3yXo@4tOIo{C_$$m}2cO8xY}?0cs^?c2g;1!jL={$}6Z7#A>G$>sUJIaNAf zcFW2~`_2aogV{@C@9n!;%?V~VX58BM&EqfFZNYxm_HA)x28pvR5WBQ5;Tzcb7U$3H z)46pKtc0cR%svj^DPXpa`sscDvbDkN_s34|^N=$Dvk%6f*e5gH7{ul<EI+=_d%_DV zaO$|lcYNQEwE+-L(voBQHogyma9S;n?K`wH9Kun%bZj5@!kN1!FBDN?D>$)Fgu@>c z$Lz;@PVKW%X$P|<RnG1!y8_7t^R`^rXQ=oDB+l{H{OZ2@nF8Ph$gyeu&3!9>@IW~C z9^c(Jxs?OL*~RpDU-oU~J(B}vl-NJL*!Psd4kB@F#ru7W4~0QE0TSQ$8NSbjaDK1< zw=d^w5rnhGfO-F9?J@`_CV+jv=(lDFM|C~-ex94%5YA#3f&B;9Pl9k5TSfLC<?euR zbTTCNuRc`);XKNe*?-!s0m50`q_AI95^7purpo@Fjg=4?1#yl2&$6>2oK45H_oqIA zayAs|@2}mT1(Dg^XSDyJlRJbnL&9{wZn+YKqtR!+|8KG^gp>c;a{rD%aWF?j@wN5- z^EdwQo>DR;DK&PAhBu=(<K)-!(vyG5^Q1C(Gk7z3Gl9gsk=cx3Q6{h$qc?;Hq9Lk5 z;vgPKCy31e)(_%=%z&tY$uoE}Oui^DJo%Blgdj*3riu~dmdS?9!kcpxZt~Y}UnJ6& z#dvYQ=ftR*)%=h48(+G2YEI6R{d;&4cCU+lw*U8=#49D9Z}w-M>P*yK@@jwKi@IsP z2VU=gJHJB1d)4dxwm00B7aY5`-<HXiYuEFq`|rQ>5Yg>@vVZrd%O*RNAM8&*x1mr{ z{MG&$8WTibe0;P2jM?E?7A7zEzn@X}tHJ9{{r>BZ8IA_Lyu82J_PFO#&WHQ|&k6EY ze)DAiy?Kw%J-l&$|KcXK$IPo<?Khh8O82VVoBd6;f0dd1UhjX+*K$qj^Q--O{_Yl6 z>Q3)3c-H#pKKt?gTV!}owxyrg&$-O~n{MNq{f1RO^DCVm?$_N{uUC`yc7L6GPtQb# zhx<RbH|s_l)<4_7{lU+$mo|_0tDiS-{?Gezf2`^sIsQeD_a~XW{<3@5tNrbEvwe@h zd%RyF<x<K*$H)8i1eg8tDt@wGMP}juPm3<^&wAZ#%MyQof8B|j87rRN+rKHs+v}>= zqy6SmCtvn#dbIz<>1&&<L?7&TO7OPKy>)N@wg`7G`Lc)mtyhJYDc1Ac-+!k^WZK@{ z7xxQD_k><8J-(l<CT(4L)z$r>ay`>*k6z!uSEG_y%lZEPujknIxr$%ke~`~|&0p1< z`?n;p?9#n@asLk9H5%Nqm-d^r#B|Twer`YOn+q%_9$wgg%i?Z~Wb@ViX49++ODym0 z_b!lFWi;!`{<5Tvd3R>r-*2<ld{KRG@}d2%>lP|Lmf5+#DtqEiSH*q%Ip%&%ddYic z|6YarHl>co_cLyc+*!kZazF3Hv;DOP9_-(GQi%J;-@E%wRKF<++`qSf#(ud4YuT^v zKPFHp+rH!a{;Rit$ZssZzTa$xM7Y=Pv->S3SEbY)KC}Pzx91XasVDZ!^ep_mp?Lp( z1_p+D24pZTSRs$C>hk_`49x##=%3jC(P3LCyZ7b&yV;j5IyHF*NCQ~;EF0J7-Bz3T zSBtE3e^|8zF23%i`ja`!*6lxbb%tuunG5@y&#Yvd+<I~Uv{3G@((s%6m#Z0{3$x$1 zpX1N1zw+xZ>_73k{^56V&4c@Qiy!B`z4ZXxAXp-LvG#Bp8`sJGDPGMR)C3Ri4}HH| z(QNX`{akxb|6w|HcK^dbQC7RX=l7>bC6;WkKLXbeQxA460|Ud-gkx8tdC%_u!KboP zFJkZhq=>NjxdIpWZ>~SH_KeD=RS1n}2Ehz4f5lvSNbKPLYA@~Z<4^ndU(fnKRW}I{ zB3s^Vm}NEP1UwDhTBg}K=lzcTAJ56BTyfY8Hy<Wm#^-c#!SlWQ4K&ohFxE`oxF1~t arT``nqc=Y^zF|GtwLq9-!UB-vO7#E(*-q8~ diff --git a/irlc/tests/unitgrade_data/MCAgentQuestion.pkl b/irlc/tests/unitgrade_data/MCAgentQuestion.pkl index c552c3c66dbd63e44581067c53e74783681ffc2a..3e631e5d95d7b0257a00cdadcb8a84bf962be8fa 100644 GIT binary patch literal 6025 zcmZo*nOY~#00y;FG<x`bogLFt^GX6sQ;SP7^Yf<ka22PPgcdmGBo-G>X`9l+Qj(dQ zI;CxjyWMXGu&xZo9{!Tl;*$8l__WfzWU!{TDLrh7#l@*bB~#j_)K1A@%wPivIV2}0 zXQWQ)5lPO^EhsHXjV~!m%t}oz$uG)GEuPZD3D%#VpHn=ghb!JAu{a|&B{;Qk%H%1U zjZ^$;r}S`UBo=3sCFYc-PU+&Pmf58fcWtGAaod!n_9<;sVyE=*XBOoo>!ns080sa0 zJTaw*D+Fdk<&?=&G-9V{^oZq^<`z`y#V6+%rRwFD=9FY678NB{PU+!^FG@|$&nqq| zDork#GI>f5D_G@}9`?Kxh?2=uyct@jI5Q?qX`d1_MZ=p>Gef?IHKnAoAQhyZDbr#~ zX9q-zxrfnail3jK*Z=?j|APr{hLR~soz5KKKx1HFm{Kw&DMKzp5oBbBGDH`nW`<U$ zGY2D74IU*-gq1K8R>DG92`gbGY=o7t6IKEX7(AZlBy0*7VI|yzmGBT&!b?~QA7Lf@ zgp~*oRw77Pi4b8W!i1HG5LO~eScw>6CD0NHUnY?tY>FgdB~pZyNE22fLs*F{VI^{e zmB<rTqCi-QB4H&;gq0{0R-!^!i7H_wYJ`=j6IP-@ScxW*lGIpG0q@P&HpQFKo1txr zHv@>w_@D9Ln*l5e=4UZ7GB6-3$YN$>U_c|F8W_RakPL(9%K{bm5Ytf<GC`GtTmWG+ zc{8<5@n!~*Sppz?Ap(pb0dEK!Ss{}*bK4ZCI9QW6GuTL&8jz=$L7wX23rQ`?&CE;8 zQ7A4+EJ>XL_B)d|vo~{wOb=Uea$-(m(Ui$kGPImI;FU_-lps);fK)MpR6;R>H)ECp zBLf3SExe8bt7Z0P^aiO01s6yMBgnUKweTtrsurS}!JEOG391i7Gk{!%qtXObv>=S9 z{s2jX@Td~h_68_SKzKA<M#F^zln_98G%<`O24WHeyyPyKk_0NAYp3+E<R%tpPr)ds zy;)kPWT^KrWf~%PlWMSblUNuT85u!Iu()<g)-O<ng5FgEnU=-K#K6$$JgS&+G&C4m zr@*_>qoFYx8l&aLXu+uAJ@gAk=uly?Uuwye#wm%lQ#3OE)=tp?^%%Vw-46&)3_L3; zl;zFjen9xY-a9k3EN^CShSH3Ga5bPll=}hUORM~!-}&Ut?0!J_->;R2H?Wju{0FN6 z2{2BcWPB8C+#hKX%U&68CT|9a(w!kDb685V7#JBCz^XyLHunR<Z}a?fYF>bC{oDAX z?7%j#tyw%UW$p)r)h{3ERsR9%<a#r?@Bh>vqw)1nKiJUHWe!o{tfg7}2-UprSy_MD zxq_S^^zp>j4TWDpW(ypTJyNm#2uPFQ2OFk4^Cp#od%j365n5;USf;NCq?#q{>iJb? z=YdoUMV_4fcoSb~mJrMeh`R-IyqPB-GlO`~o1rvI3|Sr$Qrua`+{=5fK|CahE)Ncu z{r_%?wQDo+lx9gI%e(JCITHkCAk3Em%X>4p+kb!zE)<uxLC3>0SlXtz+b7R!07;bU F0RR~->~;VE literal 4714 zcmZo*nHn#|00y;FG<x`bogLFt^GX6sQ;SP7^Yf<ka22PPgcdmGBo-G>X`9l+Qj(dQ zI;CxjyWQUgu&xZo9{!Tl;*$8l__WfzWU!{TDLrh7#l@*bB~#j_)K1A@%wPivIV2}0 zXQWQ)5lPO^EhsHXjV~!m%t}oz$uG)GEuPZD3D%#VpHn=ghb!JAu{a|&B{;Qk%H%1U zjZ^$;r}S`UBo=3sCFYc-PU+&Pmf58fcWtGAaod!n_9<;sVyE=*XBOoo>!ns080sa0 zJTaw*D+Fdk<&?=&G-9V{^oZq^<`z`y#V6+%rRwFD=9FY678NB{PU+!^FG@|$&nqq| zDork#GI>f5D_G@}9`?Kxh?2=uyct@jI5Q?qX`d1_MZ=p>Gef?IHKnAoAQhyZDbr#~ zX9q-zxrfnail3jK*Z=?j|APr{hLR~soz5KKKx1HFm{Kw&DMKzp5oBbBGDH`nW`<U$ zGY2D74IU*-gq1K8R>DG92`gbGY=o7t6IKEX7(AZlBy0*7VI|yzmGBT&!b?~QA7Lf@ zgp~*oRw77P2{eo2OLfA8O%WliM3k@+F~Um32`iBxtVEKq5-Gw;qzNmLA*@7}uo5}K zO5~B0q{f1ZAaBODDc+3U3~f`q89-#lStySYT?Q%+QFS>3480kksvxE?flUFCjNT9u zBo0x-02Kj=GhwiqyqVjkcr$}Z2%8biW&n{rd?BetxtV#1ISR!ki6yC1z)oiZDbJAU zVJl8f%t<VoGI>gd7E;;QHYEt;C6FpckV+_K@Mg?-0a6GJ35F?Ph0NZJ-XNt6-i+QL z<%}SoK@`HvL8wAzZw8S4ILcm7c@4sNiXo6R2#+d3t+qg60>Y!=G8!%%po9Rz!zVEy zx5_{TNbQs!mfXbR>?s%}i8o8@lnnJArc6WR_FxUx_8<!*BO@aykcw-kWKCdXV8Cix z)^tV&hEC^E#f+e^9Ssdw0W%sJ46Re(y{FO87`dSV?NJx|rIt)-oRU~OMI+;M?Gz1f z2KNKP>X(o7s{a7Bf4!O94+#HAdEAgb!<*UtfZzukraSW{m1dlQDSeygpHuS!qSTuK zq(=B#olHVwU1`Qyn3~dM4pHH(-i+=Cgg>hCgq;fUW`ZjHBD=U^;oZ`VD^R6^Io`~Z zkC}m$2J>GicykP*)SIC+;~H2EsF~`1fd5lgw(jX{kn4mZPtJb4i4SD6(8m*7Hxzy? z&A5)FRA`;mW0}4pggJM?lI{mMT+45U7J4H)o%cN}>rXq^(u@aSH4wX4!mgfQb#@-a z0Qdc$`eQV{9_la6cnnwL%>a`;ITHkClx94so#M^lZvO$&=q}E9h(+<gn_}(SOgyEf VZO{Rq43@Sj?)Hffz~e-vdH@MOQlkI> diff --git a/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl b/irlc/tests/unitgrade_data/MCEvaluationQuestion.pkl index d9cfe1215b3b0ede2b9c0e0ff35452f93eb10249..6a132b537b8762024112ca92a5dad5b3c6682bc6 100644 GIT binary patch literal 3707 zcmZo*nOeZd00y;FG<rmQon6Zkb4n9SGV}8SOH+$M^pqa1;?$DR0_U8>;^HZ7Q+ilR zGILX>v`ulh`wrI0kipm^T#{N`5)aaw8lP60mkid}Hl>FxvA8(3sANjpl-em7j2Ubo zA&2C|<c!oQJtE2Zxdo*qsqrO6iCL-1CHX~}sl`)zIKle!^K*))^l-&{Bo=3+rUa)J zPMJJKvvG=F?UWwQjKty$uvJsKII3lK>BL=I>0jJ7C8>Q%+mzTTJ^YzPImvpd6$XZS ziRr0%B~yC1LSQyjPMJJKBX){Lk62!5Zb7A9d~$wKs$OnsPDy5BQBh*$lpdb=qSWO4 zyyB9g(&UmUlc)5sf>loGVb4o}D49IPo1t}zGh@<}_9;PAG`tx#Gvs?%Q%WieQbFpO zGA*Wbc0i<<dl+q|`1$#H{r~^}KbY`lD4CMf>C6EMIR=I)B~y|z<T4aNMrJ5ObTMjX zXmvVsFhbSfQNl!62{U0OEQFP?5>~=SSP45}C9r_O<7rO9rf?Bf!cABS4`C&|gq1+^ z7rvP0Cv1uUVI_iul?V}5B1~9`2w^3ngq4UPDM^h5<s)y#wkh6>-b`&%ycxhGGMfP^ zi%`P|6=49YXK0&}u>uCYA*va?L2`^>k_kj+tnp?B(LH=2sYSV&d5Jj+#U+U)sZ&4> z$dKt_D^5<#Ni3Q&c}j*BQXXxa5;P@aBS;9In-D_qY>E)Vq6(f<5vs6gga!%26oe`) z8nFmrv5{;cEXHFIBF?E;{D?&ei}6@gq1XtHFzn?fsH_EHJVgjd8iYrcpjJ7cFahDw za2dSe0?!#GQ<6Zrpms_ROKxIu_7sd}gf~m;lnnJArc6WR21yOp1_=uzBO@ay+KX$a zWTi4PFkm$;D~FMRq0@O(F(W8!M?(XaOGZP3p>+zp`A(<MV1V=<iv3bcrZi4T>=6X_ zdrC_(bBgtn^K*0a^QQFhq@<=LmgbbCWG0swfqEw#Nu`-NC7F4}Q+im_a`F>FU7S`> zHzs4PyZr}96`QffegCKa7>%!o`ZHFz9}xUt!*plfq>Sb62ZSO|&VIa!FJrO$0pVc& z3k7eEWh`(%AiTU}iNNiSjQQ>dgm-Gm-14-_nCE^#_;|?UhgbAU+n^o443@Sj?)DK` IU^`0n0J=xv6951J literal 8592 zcmZo*ncAwz00y;FG<rmQon6Zkb4n9SGV}8SOH+$M^pqa1;?$DR0_U8>;^HZ7Q+ilR zGILX>v`ulh`+fkdHG{E7xFof>Bp#$UH9oC0FBz<{ZAuSYVsUY5QOT6HDYa8F7&F*F zLJrA^$r-6rdPI`*a|=pKQsYaC60=g1OY(~{Q;VncaDw&c=jRkp>EVj^NG#4sO$kmd zoHBWeX5$pU+9^Gp8HvRiV5_Edaa7Cf(uupa(!aQEN>cljwkfewdiXPoa+38@D+~<v z64O)jN~ZL1g}`j6oHBWeM(h-g9<jXA+=5EI_~iVeRK48NoRZAMqN2pgDLp*#MXAa8 zdBr6~rO72zCQs>M1*@FW!=9G{Q8IaoH$&?bXU3!{?NfrLXm~SfX2|!jrj%3`q=M8l zWm-(>?0`ry_b}Q_@$>WZ`v3p`e=y<AP%<T{)0qPta10C#Q%a^JWyobHf{e^ihUj9{ z%+Ts|=3s=X!J~wUuo7m%N>~UhVI{1Djj$4S!b)HPgU8dHgiYZhtc07e5+1@zcnK@v zBdmm<uo3~nN(2cj5hAQan6MHN!b(I5D-k2CM4Ye^3BpPw2`iBztVEiy5*fluWC<&g zBdkQ8uo4BrN}vTlz9LA8uqn!fm8cL_qDoka8et{sgq3IzR-#E*i56ic+Ju$p5LTi~ zScx8CCHjPw7!X!sNLYyxVI{_dm6#A#VoF$v8DS;nNJ>&;K~=doW7`yOCNRkeC7If$ zcr$u4gGmsZ$s0m~RKnCSv`z75@MdV6lI6?DzyKz^89{uAs5b-DERY^BJ1YpJ4n#mz zKvXk<^+4EQ^N>x+iUjEa5l~S~+haj0Km^3btT;vn28ab&@gOM>0hLOCNHMid$w~w< z8KK@nc#awDXlAfD20JSgWFCm{W(GOBhc6_xC^s`NF-M`eB(WrQ3Mf1?WO~?&lM{0i zi>6GTlA(pvq-mQHG$kvKk%0ju3U8vIh(hCrVG0qV@OBW2T6hBrMHIVQ>~0`MEp`*I zi(+>fcD2~UffN(4n~Pl(yN|KkhFui9x!6TXF#)^Fu!~|h7kfxzHvzkQv8%-{irqF+ z%*C!2yC^BPk)jrRNMaYo9uC;mVs`^}6G%~u-8Sr^MA!x{#jtnJKwUZz#?w;)NrUjH z64ah8C`>?jG+ai*g#(ljKzKAUj3$QB#DHTg5R^ecctm9qc&%MBB?(j!*G}nS$xSTI zo`Mlb-Yl(CGSqvRG7XW3UTd%py|OSeGBSd~vAA|hmKHMu16I?r44D}iI-N%qGlIf) zG&Epk^Jr)=v`&FfRE&nkXlRU<qNCM>hWBVSfii<TS}@`)7@?Ei#eS(JQyQlv_6UNf z>q|>AbBgtn^K*0a^QQFhq@<=LmgbbCWG0swfhN^Cl1ejkN;30`r}VI<<>V)VX69Q# zv+r41j0_C!_8%Z)L|JJd-U05cWA5d>*RoQ<Jl^-LtUv8s!Nc6{2ZSO|&VIa!FDn!* zC;VISru&DatN;k_->;R2H?V-m@7)gw|6dhbxveP62O{@>ckQnqF<G8a-kmoc8v?T2 zp}d#9D(;W6+@QR7T+KOKvs|FOFO9QLZOU?n^1huDvp!VX23-x2!O}Lx-9C8}*!iV; E0M{9#1ONa4 diff --git a/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl b/irlc/tests/unitgrade_data/NStepSarsaQuestion.pkl index 10b68255bebadaa13730e897e7a8cd2064666d88..d2afb2c21357b5e6d0a93407654b733d4398d13e 100644 GIT binary patch delta 88 zcmbQmG>d714YNJNf{FH~%KQJn-ClO%cbxkH_BF}31KMNU5AdXyD%VHNa6iEMbehnQ vMEi-;6O?t*E*HD|F&&uFHYI3EX$E5kTicWjmbNMG_HPf)Y+ztuDAfZ1tsW&o delta 88 zcmbQmG>d714YS>Uhl%#4$_JQ@s*m<Z?{GiB&Nulk?}7>L2bi}q$s9O;$Nd09{VlGx v^mh}dCn&G|E%tx2Dbs-|ZBv4#lx8qyu(eIeU}>A;ZvV<+X#)cTL#ZABbNM9P diff --git a/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl b/irlc/tests/unitgrade_data/NonstatiotnaryAgentQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 GIT binary patch delta 102 zcmZqpz}oPEb%G_+ABK(A+J=k=Hm4g#8!)#$c9?uLQJwMY=C_FsnvDIM^}0PYnVJnI zS8P#a`sT2C_7*-vrr?j8S3gxSWS)O!!t`@uj5>^8w*MAmTw}mEYx@d!Mt=j&w(kiH J3=9mVdH{3RC|m#l delta 104 zcmZqpz}oPEb%G_+e}|3M+J=nxH>Vp$8!)#^9GHAGQJwMI=C_FsnvCt6^}0PYnW|JK zS8P#a`rfd4_7*-vrjT2kS3gxSWL_ZoWBNHUMxDuT5;?Xri8C%WV7$3~g*&6a0cSh& L0tN;KhEhEMxyUEE diff --git a/irlc/tests/unitgrade_data/PendulumQuestion.pkl b/irlc/tests/unitgrade_data/PendulumQuestion.pkl index f9c111e0fde29667564c4bce12403bdbca2ede4b..61cfc2e5e8d74b9cedaed9bbabba199444880933 100644 GIT binary patch delta 142 zcmX@EbzEzLJ@bmW4<<U9Gai^&HH%45VX_$GPDb9z7Z{ZlgzmY0U|?X#Fv?(So07rN zHl=n7l+DsM#og}IWML)~PKgH$Adq1+`5>Fz<~*kJ%#05v+p(#!7;|OiOwM2nV3G^i oyo>EI2b1)J$@BTuna|Ejn0%5yfJvrcv$TK;E5`zmc7{?t0E-_jF#rGn delta 142 zcmX@EbzEzLJ@YD;6BC`x8Ba{Cn#Ck8Fj<UoCnNvl3yjJN!hWkJFfcG=7-g`vP08SB zn^HRk%4TVs;%;|rvM`egr`!St5Xdl^e2`6Ua~{)qX2ycac5G@aMxWS1CTFk(Fv%8d o-o^HqgGo7H@_c@E<`26YCZFUFV3Kv%EG?kI%JBfCouO0@0RCz$00000 diff --git a/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl b/irlc/tests/unitgrade_data/Problem1BobsFriend.pkl index c367454afea824f4784577e595e85f92c7535338..402ac40b12769d8211ed434cc664b87f02e0be51 100644 GIT binary patch delta 28 gcmZ3*xQcOtEt9~3iT0*UzZ@o(X>&ee0E1FJ0EEE@_W%F@ delta 28 gcmZ3*xQcOtEt9arM0-=F-wYGWv^gI#fI+Dq0C$E5IsgCw diff --git a/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl b/irlc/tests/unitgrade_data/Problem1DiscreteKuromoto.pkl index 8a46ae488da7c6c88812a86d3b9f5deb404329fc..e9261fc8c65ffea540aca333c5e8fc9e31b3b779 100644 GIT binary patch delta 40 ucmdnRvWsPcGt>WuiLSj&HaQco>|kt}+{@_CbeLiCJ;o4@M+_iPss{i?uMP+R delta 40 ucmdnRvWsPcGt=J#6J2|mY$GOK*}*6<xtGzO=~2VvdyF9*dl*2VR1W}2oenqv diff --git a/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl b/irlc/tests/unitgrade_data/Problem1Kuramoto.pkl index aa1f77c7396deed36b343f489727774ebcebc3df..402b6e64ed470d789707284b32dc7c0e4940adaa 100644 GIT binary patch delta 32 lcmX>meoTCVHRG>|HvNpr6VL2mtl8YkXvxK~fdK?c^#IZV3aS7A delta 32 ocmX>meoTCVHRJDzHvNoOC!X2Ccw%!aqa_zdUl#)d14F4E0OBkQ(EtDd diff --git a/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl b/irlc/tests/unitgrade_data/Problem1_to_3_Warmup.pkl index 6428b65c09cb5b72e9f945066f6343f4d8a24009..4dca1ffc8ec8944f3d0619b092dbfc24d221f77d 100644 GIT binary patch delta 36 qcmey!{E>NrJ=5<46CHCIB_?lR6q$JV4daQ)8yGD)zA%75sU83seGN(g delta 36 qcmey!{E>NrJ=0%?iH^C9=O%Ap6q$JV4Wq*34UCo?R~SH`R1W|O;S72J diff --git a/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl b/irlc/tests/unitgrade_data/Problem2BobsPolicy.pkl index 6e3d0a43209e6cc03cc992ef8fc4be5b219a2fa2..0133173f9abe7ce08622467ef6ce34a34987e782 100644 GIT binary patch delta 57 zcmeys^nq!DEz_5ViS|a!pBoq^u9llPNsPJduEWIDCd>!yOD3x^%1vTaW<I2<Hd&3) OiNjBWfq{XcR1W}p91>~( delta 57 zcmeys^nq!DEz@6ziS|a!KN=V&u9llPNsM`SLde9`Cd^AX0w$|5%1vTaW<I=~XR;cj O6US2y1_lO(Qau1`0}_n@ diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicDP.pkl index c623107a7d3b7ae5b8b3ae136dbe3b0a806f400a..674e8d3f54aeed4eeb13056dc384ceaf5831f9b3 100644 GIT binary patch delta 21 acmZ3;xR7yz6VsQ5i7ufWB@7@?ss{i~69v`) delta 21 acmZ3;xR7yz6VtZ?6J0_%JQzTrR1W}84+ZQ1 diff --git a/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl b/irlc/tests/unitgrade_data/Problem2DeterministicInventory.pkl index 6c26fffacf4abd79d8016b28dacc3b460d9db347..547769c9bb40f7e2f9e061a3d24943b7bf016ea1 100644 GIT binary patch delta 23 bcmZo*Y+#(=!EDcvFwt9w^DP4yl<ENhMeqe4 delta 23 ccmZo*Y+#(=!EE=xVWPJV=j#Rr5Gd6H09t<rx&QzG diff --git a/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl b/irlc/tests/unitgrade_data/Problem3InventoryInventoryEnvironment.pkl index d79217522553caef898abe605f8a24a40636a08b..dd7c5af1163a3610ddac292eac48d2bb7792bb9a 100644 GIT binary patch delta 36 scmX@ibeL&E0ORk8fgNmnN+wINgiX9J%EY;0;x|{0KNbuO3=E}u00|@v_W%F@ delta 36 scmX@ibeL&E0OOyDfgNnU)ANLy87AHrW#aFc_|280cLxIl14F4E00uA&*#H0l diff --git a/irlc/tests/unitgrade_data/Problem3LQR.pkl b/irlc/tests/unitgrade_data/Problem3LQR.pkl index e1981fe372bc707bca0260655dc4fc4a44466b19..0ffab948d9210a04dcf3aac3fb2ed0b93dba7ca2 100644 GIT binary patch delta 51 zcmaFK|B`=#8IzUBMhg}uMvl$4Ow5do9h)7QQ<#{v9&BFDn!w1U)iIfq{RYPs1`sIK F0|0PB4OIXD delta 51 zcmaFK|B`=#8I$#njTS6Sj5jvhGBGnU9@y;2oWjJUxnT2h)&xc-jR})E*>7-6U;u$q FJpidS4ub#y diff --git a/irlc/tests/unitgrade_data/Problem3PID.pkl b/irlc/tests/unitgrade_data/Problem3PID.pkl index 839dd814eff6e41f6fb597adb852de2fbb0f8e19..d17327151a7cf7abcbd9144d790d5816c40e8376 100644 GIT binary patch delta 34 ncmX@dbdG641>=K>m5xl}4<_z$WfHkCnVHdvQ#gPD1WNS)+<^)% delta 34 ncmX@dbdG641>=Q@m5xjj3={XbGKqee%*^P-DH6Z{0;PHY%76)B diff --git a/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl b/irlc/tests/unitgrade_data/Problem3StochasticDP.pkl index 8b59f1de92e3fd5734568a88df1fea70ae262fe3..42b78bdb0fc666d7de65b7898084a8b2ebfc9357 100644 GIT binary patch delta 27 gcmcb~bdza<J=2#16CHDygbXIGU&P_W00O0Y0Fg-ujQ{`u delta 27 gcmcb~bdza<J=52QiH<o;f)6IHU&Jwi0R&3*0GICxO8@`> diff --git a/irlc/tests/unitgrade_data/Problem4DPAgent.pkl b/irlc/tests/unitgrade_data/Problem4DPAgent.pkl index b28d450ca88a391a8ccfa480734fde8141d1b6f4..b3afcdcbdf741bc3db13e7fe30128763d31475ea 100644 GIT binary patch delta 20 Zcmb=doM6TDm0_Zd38&Bo1`sIK0{}wx1tS0e delta 20 Zcmb=doM6TD#bKh238&x&1`sIK0{}){1z`XH diff --git a/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl b/irlc/tests/unitgrade_data/Problem4InventoryTrain.pkl index ee99756d57edf4a222856e4fcb64d101eb1ad454..d11c158fe1b26097970110155f252694dbc24699 100644 GIT binary patch delta 47 zcmeyw_=$0X6QjdK=N@I*s1)n_=LHT-X`2!>r8t8zgRN~!220x%cl#bj1qKENhEhEM Dx*ZT@ delta 47 zcmeyw_=$0X6C=Y!=N@H|b?y(VL<A2^X`2!>r8t8zgRN~!220x%cl(~$2Mi1h45fMi Du_X}p diff --git a/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl b/irlc/tests/unitgrade_data/Problem4LQRAgent.pkl index 569bd52839bd666ad594a21ada6ef6f6bff3342a..eb2ddd4971a857b980fd75550b2cc7c96ed85318 100644 GIT binary patch delta 32 ocmdnZyqkG~HPbhTiMF4ZYJ4Z!N>27-%-}4(et?02fuU3n0LSAB?f?J) delta 37 tcmdnZyqkG~HPhDvlMNXqCx+c*uH|o-XeTk*i!p<<MDhRw0|P^;9smIA3(^1p diff --git a/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl b/irlc/tests/unitgrade_data/Problem4PIDAgent.pkl index c3fd8909d810a5943171ac685a2e95326d9ae808..2dc14ad1e55bd9a85108809779f809f20ec255a3 100644 GIT binary patch delta 50 zcmX@8a!_T04P(Pb+g&V7Gb1MFa7u0#VvS&7GUM2s!<os#WEZjdI&UHi=aO>;3=9km GrFsCN3l8-F delta 50 zcmX@8a!_T04I{%w+g&V7@;@f$a7u0#VvS&7Dp{~OhclCfX)4F&>%569oJ$=Q7#J8B GO7#Gq?+%;* diff --git a/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl b/irlc/tests/unitgrade_data/Problem4PolicyEvaluation.pkl index e988ad6626a12e54cb6292ed0560835fcde9c488..9cb9f08bc370f4a30110aeaceff81fc5895be6ae 100644 GIT binary patch delta 28 kcmaFM@|I<SGt;*N8(rNQ8LKAuGTz{jKfu7iz)-3O0HtmTssI20 delta 28 kcmaFM@|I<SGt<|GjjryDjQNv$8E<e%J1{UXFqG;60GuBP+5i9m diff --git a/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl b/irlc/tests/unitgrade_data/Problem5PacmanHardcoded.pkl index 60282c5cbf6353a285a6425a363e36a1c4658156..a41d0222d42045ede2d5eacd73513bb2a9100775 100644 GIT binary patch delta 20 Zcmb=eo#4du&0(U80cRE$0|=Dr0RTnB1u6gl delta 20 bcmb=eo#4duonfMj0cWO80|NsCL#ZABM%o38 diff --git a/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl b/irlc/tests/unitgrade_data/Problem5PolicyIteration.pkl index 08d0a1de9624ce2840d456c97c121036d2a6a41d..a5668f0cd26821ec7bd502c8cf6334dbae2f50ec 100644 GIT binary patch delta 21 ccmbQlJc)UNGvk4cE^dq*4jK#$3=E}u07F*=4gdfE delta 21 ccmbQlJc)UNGo!;s7dJ)@ksk~U3=E}u06?b&G5`Po diff --git a/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl b/irlc/tests/unitgrade_data/Problem5_6_Boeing.pkl index d313e26e5727bec7415f60a10638d703b76313a3..066b7ad5d643ab4af11fd1ff5482f06707b6e498 100644 GIT binary patch delta 43 zcmeyZ@LOSm4b%4n8||JmGEWXkm}oD$*_O$Hk-2_4!{!F&&paGKEes3{45fMifF%yZ delta 43 zcmeyZ@LOSm4bu;Yjdsr&nJ1l{FwtIgvn`VYBXh(02b&w1Kl5<-PGMkRU?|lC0FUtx A@c;k- diff --git a/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl b/irlc/tests/unitgrade_data/Problem6ChessTournament.pkl index 7035e7095ef9ad98f42285f0164d461f0709159f..00e68ef4a2d4f3938898b5ba0813f27abf322dbc 100644 GIT binary patch delta 21 ccmX@gc$9I16Vtbbi7u5Kye13`3=E}u08(oP5dZ)H delta 21 ccmX@gc$9I16VrExi7u5K5ycD)3=E}u08>f^TmS$7 diff --git a/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl b/irlc/tests/unitgrade_data/Problem6ValueIteration.pkl index 26117dae7081cd5dde5d04e16e133337034e99c2..f242501c0d8cfea0cfff3745b5c79a7e5c7a74a6 100644 GIT binary patch delta 22 bcmbQhJb`(FBhxpBjn1x&92pECP^t$2Olt*w delta 22 bcmbQhJb`(FBh#+~8=YMlITkR0K&c)8Rc8h{ diff --git a/irlc/tests/unitgrade_data/Problem7PIDCar.pkl b/irlc/tests/unitgrade_data/Problem7PIDCar.pkl index 3393d7eb47dbebe9239b902d13032a4eebcb48fa..515794b16a025761728494b5a33329e533e5c2d7 100644 GIT binary patch delta 46 zcmZ3?yqI}{B@6F~)`W?+af}xyeiWU!(Vp?p#E&72dnSGqogB?*&v}yp3`+F?tJ)BX delta 46 zcmZ3?yqI}{B@3VD8i$Ftag3KHeiWU!(Vp?h#E&722PS?LogB?*&$)vE3`+F?nYIu4 diff --git a/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl b/irlc/tests/unitgrade_data/Problem7_8_PidLQR.pkl index a8befec36a3e155f9f1fe055f8d0b2eb841f3795..b579ed7c05dd433dc7d008cdc6ade7223c149822 100644 GIT binary patch delta 27 gcmbQwJfC@j4deHTw!fJA7fiNfjN|BI0D)3H0E-9+W&i*H delta 27 gcmbQwJfC@j4dbtgw!fJ88zx&a#&N7;0D)3H0D}Yv=>Px# diff --git a/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl b/irlc/tests/unitgrade_data/Problem8ValueIterationAgent.pkl index b5a78afcd03096134dd779734ebd7428b75101f6..5629afc31194b2fc36c1b212f28076015b775d3c 100644 GIT binary patch delta 28 kcmX@YbcAVw8`GBq6FrtOua93b@uN3K$O;Aq28L2S0I-J&#Q*>R delta 28 kcmX@YbcAVw8`D>Yi5|<C*KsVE_|cogD20K6fuU3n0GW3QKmY&$ diff --git a/irlc/tests/unitgrade_data/Problem9Gambler.pkl b/irlc/tests/unitgrade_data/Problem9Gambler.pkl index 32659e41fc735fcb00b86d77c302cd4f8fefd1fd..ed58c48391a348bfe8dbfeff2205acd5418c086e 100644 GIT binary patch delta 22 dcmdnZv72Lp71Pg#jW*wzIm#Uv7#J8z^#EZN2NeJS delta 22 ecmdnZv72Lp71OT+8*RQbbBHA{FfcHb>Hz>^d<PW( diff --git a/irlc/tests/unitgrade_data/QAgentQuestion.pkl b/irlc/tests/unitgrade_data/QAgentQuestion.pkl index b2d68c845f99cb476a059f553a9d1fc19471869d..1564ef4461bbd73cb56dcd6940b2c522e7c4bc1d 100644 GIT binary patch literal 9533 zcmZo*nQElU00y;FG<x`bogLFt^GX6sQ;SP7^Yf<ka22PPgcdmGBo-G>X`9l+Qj(dQ zI;CxjyWMXGu&xZo9{!Tl;*$8l__WfzWU!{TDLrh7#l@*bB~#j_)K1A@%wPivIV2}0 zXQWQ)5lPO^EhsHXjV~!m%t}oz$uG)GEuPZD3D%#VpHn=ghb!JAu{a|&B{;Qk%H%1U zjZ^$;r}S`UBo=3sCFYc-PU+&Pmf58fcWtGAaod!n_9<;sVyE=*XBOoo>!ns080sa0 zJTaw*D+Fdk<&?=&G-9V{^oZq^<`z`y#V6+%rRwFD=9FY678NB{PU+!^FG@|$&nqq| zDork#GI>f5D_G@}9`?Kxh?2=uyct@jI5Q?qX`d1_MZ=p>Gef?IHKnAoAQhyZDbr#~ zX9q-zxrfnail3jK*Z=?j|APr{hLR~soz5KKKx1HFm{Kw&DMKzp5oBbBGDH`nW`<U$ zGY2D74IU*-gq1K8R>DG92`gbGY=o7t6IKEX7(AZlBy0*7VI|yzmGBT&!b?~QA7Lf@ zgp~*oRw77Pi4b8W!i1HG5LO~eScw>6CE|pYNDx*cNmz*#VI|UpmB<iQB1>3_9APE$ zgq0`|R-#B)i4tKY%7m4u5LN=Md+-&9YJ^QuC#*z+uo6uqC8@EX0^XakZHhOeH$&SL zZw3&V@jv6gHv?D{%+F$EWMDv6kj2c%z<@?TH86s;AsGhImjx>BA*Q1!WP&OOxd6gu z@@8tA;>`>qvjjl)LIfB=0^SfdvO*?r=C&zNaj+(DX0VYkH6TwhgFMy47m`|(o0*rG zqflIuSduyg>~|(_W^d*UnI5*{<iwoBqA8Q7WN0~az$=xuDM6qx0jXjHsf1z%Z^kSI zMg|6uT6i4=R?F<o=nYa03NDZiMv!meYT;ELR4qg`gExaW6I36FW&pViN2Lj>Xh9fH z{Q;5&;ZY^1?F~?vfbeLzjD`ybC?SCGXkr*m48$Y`c*$KdB?(kK*G}nS$xSTIo`O+M zd$Y7o$x!cM$}~joCe>i=Cb2LwGBSdaU~%n~tY4rC1-+{TGA)adiGiWhc~mjuXlO9B zPJwr$M?+&YG)Bvf(SlLKd*~O8(4oR&ztoZ`jZ+e9r)XsSt(~F)>M?pVx*rgp7<g7x zD9fA4{ebX)y?176S>DXv45b<W;A%j9DE9-xmsa^dzw^nP+5Ld<zh5g4Z(u3S_zzYC z5@4J>$@nPPxIfY&mc26GOx_F-r8`4R=CG7zF)%VPfK`KfZSDtz-{$$})Vu)O`nT~% z*@10fTeEmz%G?hKt6x6StNsJj$@OM(-~XvUM&s+Dez2jX%N(M@Sxd9{5vqCLv$FoQ za|Jm;=;Mj48w$UI%oaEvd!%Cf5s)Uq4>nA9=1nRE_k59DBDBuxu}ohPNHt5?)$^;) z&I73yiaa^{@g}~~EFqW`5O)jacr#BvW(M(|H$!Qb7_vMhq`0$=xtI4|gLp_1T^<}R z`~Te(Yu9GtDb12bmUrKOawZ7OK$tHBmiK0GxBmbcTqrJWgN}!1u(VBaw@;qe0Fvn8 z3q&1v&tPhs0#;%73nb6LkY&KkzyKOY&)@)!sUx^N8N3++kP-3>VQk~#8B(BuZ;a*` zWULut_~>vnqVWI;m(k&9^aeL1Tt@tGG^mf`4I9e?4amVp-aw;m%-}IM<k2<=8>R*_ zm<Q4Y8MT9$gFK*kl$n7c3q*hlthOmx$C$wbj3AAWp+hFnkR7OSYMYWJ%g6v85(B9P zVfb(w>d+gwFo%{&pdmO2W&n)`f```_y}_p7C?P<D0U!+Ojg2Y+jS53@0w_#Ccr;vw z`#=fi5Xb0{#ORpI(C=D|4zaPcP9bj$5wlm8wVfH-MRMQ&0Me?4^?hJmX1Gjg)()6z zP`wQo2lbTTszLpeoiJqxC7_8jCIlDR&|NUqXoiA1Eg;i&Bg=z(J8-9hI*nO-@W{h$ zEX~>rQwA4<_!MT%0Rr+zVDf0rL>PzcDNthw+~Gq=;|YQjILwB*?IZ#DQ($>ehYcDJ Tpbi_Nvjy(6#an^nw^R=R9Z@){ literal 6517 zcmZo*nVKcZ00y;FG<x`bogLFt^GX6sQ;SP7^Yf<ka22PPgcdmGBo-G>X`9l+Qj(dQ zI;CxjyWQUgu&xZo9{!Tl;*$8l__WfzWU!{TDLrh7#l@*bB~#j_)K1A@%wPivIV2}0 zXQWQ)5lPO^EhsHXjV~!m%t}oz$uG)GEuPZD3D%#VpHn=ghb!JAu{a|&B{;Qk%H%1U zjZ^$;r}S`UBo=3sCFYc-PU+&Pmf58fcWtGAaod!n_9<;sVyE=*XBOoo>!ns080sa0 zJTaw*D+Fdk<&?=&G-9V{^oZq^<`z`y#V6+%rRwFD=9FY678NB{PU+!^FG@|$&nqq| zDork#GI>f5D_G@}9`?Kxh?2=uyct@jI5Q?qX`d1_MZ=p>Gef?IHKnAoAQhyZDbr#~ zX9q-zxrfnail3jK*Z=?j|APr{hLR~soz5KKKx1HFm{Kw&DMKzp5oBbBGDH`nW`<U$ zGY2D74IU*-gq1K8R>DG92`gbGY=o7t6IKEX7(AZlBy0*7VI|yzmGBT&!b?~QA7Lf@ zgp~*oRw77Pi4b8W!i1HG5LO~eScw>6CE|pYNDx*6E!Xg6H7UZTNE22fLs*F{VI^{e zmB=G0NsR>+LEemQQ@k0y8QP|JGl0m9vrrx*x(rktqUv%67<w~6RY6Q)0-FLN8NDGS zNF1Vu0V)C#XTo4Jc{8_7@n!~-5H=&2%>W{M_(D>Pax?Q1a}<h85=&C2fSt|+Ql25x z!&aP}n3GsEW%85^Eu^xqZAuWxOCVK@AeB(e;LVuv0;CWc5)4zo3Yonby+KMDycxYg z${9gEgD8ZTgHVOc-V7l7ag@EF@*0Hk6hk0s5FS;6T5W;C1cXP!Wi(tkKnVeahfiWa zZk2%wklHCdEV+rr*;6n|5^t8)DH-ZLOqqtr?ZFzX?LihsMn*<ZAQjh6$(q2(z<|}X ztm%vl44uxSiWxy+I~p3W0%kNc7+R;mdrzaGF>*r#+M_P^OD&nwI3=-miblri+9?{| z4DJVn)h{3ERsR8M|9UgI9}xbL^0*;=hBveO0l^P8On2r@D$O_pQ~EZ~Kd0seM5#9e zNR9BfI+=vVy3&lZFg2yi9HPQmy&2sP2!B-N2|E?y%>-5YMRsw;!n>szSD;D-bG(@+ zA2S0h4d%a4@a7mqsW(Gu#x<}SP&3v20RN|~Y~9n@AlC^+o}B%76CcQCp^qoFZYca( znsFUTsn9yB$1;6I2y^a&CEX8jxR&1xE%ZiqI`4Z{)}MB+r5O*vY9My8gk3$q>g+s- z0q*-h^~Y#@J=9;C@ffbgn*k<yawZ7OD9w0MJH?y9-Tnil(OsPJ5R2k}H^thunRrS| z+n@tJ87ys6-0c$|fX9h?_ySP}mNJ;yrhrx0{f3yDRnNr002(OD-~bI8A-Fslycq(J zA)pLlYy&?TQlOC?jEWO7m;y0;bi4&oOhUqCK*n1@?IutU4cuo1^&}y^L`ZKFxrYhq zy@Gq5Sr?fY7(i(f+&N`#o09PqWDKNp2ajD)=MJY3e6$Rt${W^KWCrz^M>~V)2?Nw> z2Vqd76i1!{NrNzDN-!JLqYdB@(89;8sk2TqF))B@6!-lPAUPURjUl+qaAs-NDIBW7 zl`zbJ(yTKuWuSTwRDQsX@MiXAD9t(tlSha%BADPR9i$9Y`e1V#nr)ZCrhzI+s1J%u S+omAuFmP2EehTclQau31=!vla diff --git a/irlc/tests/unitgrade_data/RendevouzItem.pkl b/irlc/tests/unitgrade_data/RendevouzItem.pkl index 06cde769c462c7c27150f75bf8abc31dc7c97739..7016d99087fe4b4f4ad3bc18984ce75dc502ebf3 100644 GIT binary patch delta 30 mcmcc3a+_s>C8NVeD|bfbeb;A9ZecvfxsK@r0|Ns?sU85YJPKX_ delta 30 mcmcc3a+_s>C1b-zD|bfbJ$wfyw=kaLTs~_B0|Ns?sU85WD+)va diff --git a/irlc/tests/unitgrade_data/SarsaQuestion.pkl b/irlc/tests/unitgrade_data/SarsaQuestion.pkl index bb6c4f0ca7790d883d7e3b3c3b033768ceba51e4..2074944241492ef40be122832f251c7058ae9948 100644 GIT binary patch delta 86 zcmbQrG?i(B1=DwjiB@JxT$A|R7th+@et@gM*W}C0CierJZ{<H{UyX3z|NraS^Zb)I tCQeFF&iMB$Ry~~Qz?8NrK~qXI7&F+~rev_RO>wt>=_ta$z`#(d2LMNTBa#3B delta 86 zcmbQrG?i(B1=F_!6RpgYxY@3XUuk~get<>jzCuBqp8Elwkjl61XZYO@Fsq7eop1VL s;-my+bwiFdGEqzirnF57no^p<n8DUIC4;4Hio5;WHs=Ng28L2S0J~%$&Hw-a diff --git a/irlc/tests/unitgrade_data/TD0Question.pkl b/irlc/tests/unitgrade_data/TD0Question.pkl index 775a2d90cca489ee99b02f158b2921df81b7a6b2..801506652caca503c3463bf3c4f0c0df23d086f9 100644 GIT binary patch literal 5365 zcmZo*nfgkE0Ss!VX!LN0xEKVMrWTiE=I2f6;VMoo2`zBWNh~g&(l(`sr6e;qbxPY5 zce`(34GbBKJ;Eia#U=4&i8-aI@oA-b$zY9bQ+n7Ei;Gi>N~W|;shyI+n85}Ta!5{0 z&PbioBa)n-TToh(8edYBn3bAbl3$dWT0EtP6RbZ!Kc{#~4_CZLVsS=lN^ol7l*v;x z8>jfyPU+#yNG#3(TQ#MNqgrN{PTaMX{>5!mlG>-VO^Kb-!=G7{ldP9oVPL42n4X$f zGNp$r1ZG3!l*v;xVy9^Ih~<^$7F6oRC+8QX>gAT^lw>9r6(v?q>EVelN=?qsD=sN2 zO)i--c}fo}Sml%+_Pi8`lF3uN8Cs_}GbT-GpAs}h!<$hvL%xSKrKGYT6{Mai(_%_z z2SkdwhtXz=pP!%C|NsC0g9&ejk|{}@&K!`CV_=w4G9@WPE<+JyWQH<C7o%o|R;M!u zBUB9@B}{~sFcVh7LRbkaVI^#Ym9P_50t*;Cp5`QM3KwA|+=P|z5LUuVSP36tCH#by z2oP2xNLYywVI{(Zl|Ty$d}&vduqk4Mm538oB0*S*Bw-~|gq27WRw6@Ki7a6ya)g!0 z6IP-?ScxKGB}zz2Qe#17oi}6K6mJG7$pB|Ff<+j;K_W=(jECOHyvN=QAQc%;5YQW< z2P_UZmkDkzSUlquR236g_nVA2-b`Tom_cO5J8x!?cn@DlYEf=xUSf_yaY<rH>J*TJ zGh}+$ijxy_5{srxo|2*E%mFVB+ol9f$@mNsf)}C)A*i<*rXYmir7=Q?IE`3TVG+V& zBNkOygs^ysI3eQ9B2E>Sz`^1OEPliygvCbUgs?aQix6>UVR080v#<zZF&@P%aH7Os z--Fr+AdIK#14)DMs1nqs5-3bScr;u_!-WHs5I}e|F_4@X;00RAlq66|RXe4JB{#7+ zdkRJu#haydN``t5Q>GzuXQu{hXNQH6k&zJ;8^yI#ve=mz7_gd_#mB_J(CIv?m=P4V zqoDyyyrZGP&^iS^G%y+(qoFatr6>br)T-DowPZ@;l*ArE@OWEkNoG#5UUGhJE@;S* zCnYs4u{5V7B{R9i2sA3jkyM(QQ<9liJf(*<Ehj$_G_=+V8a~Uo<-Y&KX_Fk4*bN!C z-S?l8TcGt^S|Q_(`~C+qvy%nwwKDFz@4xERl_(XoH{*r-{x$u<M=L*>X1sCVf2jWS zBrDAm8E@V9ubc@2vohYf@89wDP=AcZw~Y7h_E6E%Ht5)6220x%cl(%XaErWD4*=kt B_elT% delta 456 zcmeyWxrmpwfo1A+wu!7pj13ztVwsFmW2b0%GkP<&P4Q*|lgwZe!iI=5fJGR*ncAj! zGk7ykKFFp~&&U7)Q`)8kP02{k&~oO$BE$?)#DJuVI3cJ$WQ{07Oc}|WBRCZqC$sY< zF=Z^DEGQ@{l(DYUnT3&&k#S1Nl%(R?DFY_Vl(BBIhDdN^g!}#-Uk~-iXnf0vbl<<T z;>zF8+uSmu-1o1Vzpw1X7ORX{_x&qpg21edI0Q4^-5x4h+BPMFF@vpbN(M{Y6nFce K0B{(T>Hz@SLXgh@ diff --git a/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl b/irlc/tests/unitgrade_data/UCBAgentQuestion.pkl index edd179f5233e5a15868c54b4fd9cc2965fadb20f..473236810511533d00ae28302696def58d013643 100644 GIT binary patch delta 102 zcmZqpz}oPEb%G_+ABK(A+J=k=Hm4g#8!)#$c9?uLQJwMY=C_FsnvDIM^}0PYnVJnI zS8P#a`sT2C_7*-vrr?j8S3gxSWS)O!!t`@uj5>^8w*MAmTw}mEYx@d!Mt=j&w(kiH J3=9mVdH{3RC|m#l delta 104 zcmZqpz}oPEb%G_+e}|3M+J=nxH>Vp$8!)#^9GHAGQJwMI=C_FsnvCt6^}0PYnW|JK zS8P#a`rfd4_7*-vrjT2kS3gxSWL_ZoWBNHUMxDuT5;?Xri8C%WV7$3~g*&6a0cSh& L0tN;KhEhEMxyUEE diff --git a/irlc/utils/async_wrappers.py b/irlc/utils/async_wrappers.py index 8dbebf5..e2df79a 100644 --- a/irlc/utils/async_wrappers.py +++ b/irlc/utils/async_wrappers.py @@ -37,3 +37,61 @@ class AsyncTimeLimit(TimeLimit): truncated = True return observation, reward, terminated, truncated, info + + + + +def _fix_webassembly_packages(yes_really_do_it=False): + import importlib + import os + assert yes_really_do_it, "This function is for internal use for deploying webassembly projects. Don't use it in your base dir." + + spec = importlib.util.find_spec("sympy", None) + base = os.path.dirname(spec.origin) + testf = f"{base}/testing/__init__.py" + if base.startswith("/data/data/"): + # with open(testf, 'w') as f: + # f.write("# Nothingatall") + # with open(f"{base}/testing/runtests.py", 'w') as f: + # f.write("# Nothingatall") + + fname = f"{base}/utilities/decorator.py" + assert os.path.isfile(fname) + code = open(fname, 'r').read() + with open(fname, 'w') as f: + # print(f"{fname=}") + f.write(ncode := "\n".join([l for l in code.splitlines() if not l.startswith("from sympy.testing")])) + + code = open(fname := f"{base}/utilities/__init__.py", 'r').read() + code = code.replace("from .timeutils import timed", "timed = lambda x: 3") + with open(fname, 'w') as f: + f.write(code) + + for fname in [f"{base}/core/parameters.py", f"{base}/matrices/utilities.py"]: + code = open(fname, 'r').read() + code = code.replace("from threading import local", "local = object") + with open(fname, 'w') as f: + f.write(code) + + # Fix timeit. + code = open(fname := f"{base}/utilities/timeutils.py", 'r').read() + code = code.replace("import timeit", "# REMOVED") + with open(fname, 'w') as f: + f.write(code) + + code = open(fname := f"{base}/testing/runtests.py", 'r').read() + code = code.replace("from timeit import default_timer as clock", "# REMOVED") + # DocTestFinder, DocTestRunner + # + # code = code.replace("import doctest as pdoctest", "# REMOVED") + + # code = code.replace("from doctest import DocTestFinder, DocTestRunner", "DocTestFinder, DocTestRunner = object, object") + # code = code.replace("pdoctest._indent", "#REMOVED") + # code = code.replace("import doctest", "# REMOVED") + + with open(fname, 'w') as f: + f.write(code) + print("Patched ok.") + """NB. Remember to also patch Decimal by adding extra stuff like exceptions to the decimal-module which is masked by webassembly.""" + + pass diff --git a/irlc/utils/player_wrapper.py b/irlc/utils/player_wrapper.py index e84b48f..be01959 100644 --- a/irlc/utils/player_wrapper.py +++ b/irlc/utils/player_wrapper.py @@ -88,11 +88,14 @@ async def _webassembly_interactive(env, agent, autoplay=False): def filled_circle(surface, x, y, r, color): pygame.draw.circle(surface, color, (x, y), r, width=0) + def hline(surface, x1, x2, y, color): + pygame.draw.line(surface, color, (x1, y), (x2, y) ) gfxdraw.aapolygon = aapolygon gfxdraw.filled_polygon = filled_polygon gfxdraw.aacircle = aacircle gfxdraw.filled_circle = filled_circle + gfxdraw.hline = hline # from irlc.utils.player_wrapper import AsyncPlayerWrapperPygame -- GitLab