From 9ab4a43b46a059e8f342f6e181eb3c2dafe1d3d9 Mon Sep 17 00:00:00 2001 From: Tue Herlau <tuhe@dtu.dk> Date: Thu, 3 Apr 2025 16:40:01 +0200 Subject: [PATCH] Lecture 9 examples --- irlc/lectures/lec09/__init__.py | 1 + irlc/lectures/lec09/unf_frozenlake.py | 11 ++++++++++ irlc/lectures/lec09/unf_gridworld.py | 12 +++++++++++ .../lec09/unf_policy_evaluation_frozen.py | 20 +++++++++++++++++++ .../lec09/unf_policy_evaluation_gridworld.py | 20 +++++++++++++++++++ ...nf_policy_evaluation_stepwise_gridworld.py | 20 +++++++++++++++++++ .../unf_policy_improvement_frozenlake.py | 7 +++++++ .../lec09/unf_policy_improvement_gridworld.py | 7 +++++++ irlc/lectures/lec09/unf_vi_frozenlake.py | 17 ++++++++++++++++ irlc/lectures/lec09/unf_vi_gridworld.py | 16 +++++++++++++++ .../lec09/unf_vi_gridworld_stepwise.py | 16 +++++++++++++++ 11 files changed, 147 insertions(+) create mode 100644 irlc/lectures/lec09/__init__.py create mode 100644 irlc/lectures/lec09/unf_frozenlake.py create mode 100644 irlc/lectures/lec09/unf_gridworld.py create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_frozen.py create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_gridworld.py create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py create mode 100644 irlc/lectures/lec09/unf_policy_improvement_frozenlake.py create mode 100644 irlc/lectures/lec09/unf_policy_improvement_gridworld.py create mode 100644 irlc/lectures/lec09/unf_vi_frozenlake.py create mode 100644 irlc/lectures/lec09/unf_vi_gridworld.py create mode 100644 irlc/lectures/lec09/unf_vi_gridworld_stepwise.py diff --git a/irlc/lectures/lec09/__init__.py b/irlc/lectures/lec09/__init__.py new file mode 100644 index 0000000..a56057c --- /dev/null +++ b/irlc/lectures/lec09/__init__.py @@ -0,0 +1 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. diff --git a/irlc/lectures/lec09/unf_frozenlake.py b/irlc/lectures/lec09/unf_frozenlake.py new file mode 100644 index 0000000..3e0a920 --- /dev/null +++ b/irlc/lectures/lec09/unf_frozenlake.py @@ -0,0 +1,11 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex01.agent import Agent +from irlc.gridworld.gridworld_environments import FrozenLake +from irlc import interactive, train + +if __name__ == "__main__": + env = FrozenLake(render_mode='human', print_states=True) + env, agent = interactive(env, Agent(env)) + agent.label = "Random agent" + train(env, agent, num_episodes=100, verbose=False) + env.close() diff --git a/irlc/lectures/lec09/unf_gridworld.py b/irlc/lectures/lec09/unf_gridworld.py new file mode 100644 index 0000000..f666511 --- /dev/null +++ b/irlc/lectures/lec09/unf_gridworld.py @@ -0,0 +1,12 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.ex01.agent import Agent +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc import interactive, train + + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', print_states=True, living_reward=-0.05) + env, agent = interactive(env, Agent(env)) + agent.label = "Random agent" + train(env, agent, num_episodes=100, verbose=False) + env.close() diff --git a/irlc/lectures/lec09/unf_policy_evaluation_frozen.py b/irlc/lectures/lec09/unf_policy_evaluation_frozen.py new file mode 100644 index 0000000..9adda9f --- /dev/null +++ b/irlc/lectures/lec09/unf_policy_evaluation_frozen.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import FrozenLake +from irlc import interactive, train +from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2 + +def policy_evaluation(env=None): + agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=100) + env.close() + +def policy_improvement(env=None, q_mode=True): + agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=1000, verbose=False) + env.close() + +if __name__ == "__main__": + env = FrozenLake(render_mode='human', living_reward=-0.0) + policy_evaluation(env) diff --git a/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py b/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py new file mode 100644 index 0000000..ccfd39c --- /dev/null +++ b/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc import interactive, train +from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2 + +def policy_evaluation(env=None): + agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None, only_update_current=False) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=100) + env.close() + +def policy_improvement(env=None, q_mode=True): + agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=1000) + env.close() + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + policy_evaluation(env) diff --git a/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py b/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py new file mode 100644 index 0000000..a438af8 --- /dev/null +++ b/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py @@ -0,0 +1,20 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc import interactive, train +from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2 + +def policy_evaluation_stepwise(env=None): + agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None, only_update_current=True) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=100) + env.close() + +def policy_improvement(env=None, q_mode=True): + agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20) + env, agent = interactive(env, agent) + train(env, agent, num_episodes=1000) + env.close() + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + policy_evaluation_stepwise(env) diff --git a/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py b/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py new file mode 100644 index 0000000..7242b00 --- /dev/null +++ b/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment, FrozenLake +from irlc.lectures.unf.unf_policy_evaluation_gridworld import policy_improvement + +if __name__ == "__main__": + env = FrozenLake(render_mode='human', living_reward=-0) + policy_improvement(env) diff --git a/irlc/lectures/lec09/unf_policy_improvement_gridworld.py b/irlc/lectures/lec09/unf_policy_improvement_gridworld.py new file mode 100644 index 0000000..eb6d762 --- /dev/null +++ b/irlc/lectures/lec09/unf_policy_improvement_gridworld.py @@ -0,0 +1,7 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.lectures.unf.unf_policy_evaluation_gridworld import policy_improvement + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + policy_improvement(env) diff --git a/irlc/lectures/lec09/unf_vi_frozenlake.py b/irlc/lectures/lec09/unf_vi_frozenlake.py new file mode 100644 index 0000000..4ece4f2 --- /dev/null +++ b/irlc/lectures/lec09/unf_vi_frozenlake.py @@ -0,0 +1,17 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import FrozenLake +from irlc.ex01.agent import train +from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3 +from irlc import interactive + +def q1_vi(env): + agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=False) + env, agent = interactive(env, agent) + env.reset() + train(env, agent, num_episodes=100) + env.close() + + +if __name__ == "__main__": + env = FrozenLake(render_mode='human', living_reward=-0) + q1_vi(env) diff --git a/irlc/lectures/lec09/unf_vi_gridworld.py b/irlc/lectures/lec09/unf_vi_gridworld.py new file mode 100644 index 0000000..766303f --- /dev/null +++ b/irlc/lectures/lec09/unf_vi_gridworld.py @@ -0,0 +1,16 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.ex01.agent import train +from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3 +from irlc import interactive + +def q1_vi(env): + agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=False) + env, agent = interactive(env, agent) + env.reset() + train(env, agent, num_episodes=100) + env.close() + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', living_reward=-0.05) + q1_vi(env) diff --git a/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py b/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py new file mode 100644 index 0000000..152a91b --- /dev/null +++ b/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py @@ -0,0 +1,16 @@ +# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text. +from irlc.gridworld.gridworld_environments import BookGridEnvironment +from irlc.ex01.agent import train +from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3 +from irlc import interactive + +def q1_vi(env): + agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=True) + env, agent = interactive(env, agent) + env.reset() + train(env, agent, num_episodes=100) + env.close() + +if __name__ == "__main__": + env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=False) + q1_vi(env) -- GitLab