From 9ab4a43b46a059e8f342f6e181eb3c2dafe1d3d9 Mon Sep 17 00:00:00 2001
From: Tue Herlau <tuhe@dtu.dk>
Date: Thu, 3 Apr 2025 16:40:01 +0200
Subject: [PATCH] Lecture 9 examples

---
 irlc/lectures/lec09/__init__.py               |  1 +
 irlc/lectures/lec09/unf_frozenlake.py         | 11 ++++++++++
 irlc/lectures/lec09/unf_gridworld.py          | 12 +++++++++++
 .../lec09/unf_policy_evaluation_frozen.py     | 20 +++++++++++++++++++
 .../lec09/unf_policy_evaluation_gridworld.py  | 20 +++++++++++++++++++
 ...nf_policy_evaluation_stepwise_gridworld.py | 20 +++++++++++++++++++
 .../unf_policy_improvement_frozenlake.py      |  7 +++++++
 .../lec09/unf_policy_improvement_gridworld.py |  7 +++++++
 irlc/lectures/lec09/unf_vi_frozenlake.py      | 17 ++++++++++++++++
 irlc/lectures/lec09/unf_vi_gridworld.py       | 16 +++++++++++++++
 .../lec09/unf_vi_gridworld_stepwise.py        | 16 +++++++++++++++
 11 files changed, 147 insertions(+)
 create mode 100644 irlc/lectures/lec09/__init__.py
 create mode 100644 irlc/lectures/lec09/unf_frozenlake.py
 create mode 100644 irlc/lectures/lec09/unf_gridworld.py
 create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_frozen.py
 create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_gridworld.py
 create mode 100644 irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py
 create mode 100644 irlc/lectures/lec09/unf_policy_improvement_frozenlake.py
 create mode 100644 irlc/lectures/lec09/unf_policy_improvement_gridworld.py
 create mode 100644 irlc/lectures/lec09/unf_vi_frozenlake.py
 create mode 100644 irlc/lectures/lec09/unf_vi_gridworld.py
 create mode 100644 irlc/lectures/lec09/unf_vi_gridworld_stepwise.py

diff --git a/irlc/lectures/lec09/__init__.py b/irlc/lectures/lec09/__init__.py
new file mode 100644
index 0000000..a56057c
--- /dev/null
+++ b/irlc/lectures/lec09/__init__.py
@@ -0,0 +1 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
diff --git a/irlc/lectures/lec09/unf_frozenlake.py b/irlc/lectures/lec09/unf_frozenlake.py
new file mode 100644
index 0000000..3e0a920
--- /dev/null
+++ b/irlc/lectures/lec09/unf_frozenlake.py
@@ -0,0 +1,11 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex01.agent import Agent
+from irlc.gridworld.gridworld_environments import FrozenLake
+from irlc import interactive, train
+
+if __name__ == "__main__":
+    env = FrozenLake(render_mode='human', print_states=True)
+    env, agent = interactive(env, Agent(env))
+    agent.label = "Random agent"
+    train(env, agent, num_episodes=100, verbose=False)
+    env.close()
diff --git a/irlc/lectures/lec09/unf_gridworld.py b/irlc/lectures/lec09/unf_gridworld.py
new file mode 100644
index 0000000..f666511
--- /dev/null
+++ b/irlc/lectures/lec09/unf_gridworld.py
@@ -0,0 +1,12 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex01.agent import Agent
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc import interactive, train
+
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', print_states=True, living_reward=-0.05)
+    env, agent = interactive(env, Agent(env))
+    agent.label = "Random agent"
+    train(env, agent, num_episodes=100, verbose=False)
+    env.close()
diff --git a/irlc/lectures/lec09/unf_policy_evaluation_frozen.py b/irlc/lectures/lec09/unf_policy_evaluation_frozen.py
new file mode 100644
index 0000000..9adda9f
--- /dev/null
+++ b/irlc/lectures/lec09/unf_policy_evaluation_frozen.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import FrozenLake
+from irlc import interactive, train
+from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2
+
+def policy_evaluation(env=None):
+    agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=100)
+    env.close()
+
+def policy_improvement(env=None, q_mode=True):
+    agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=1000, verbose=False)
+    env.close()
+
+if __name__ == "__main__":
+    env = FrozenLake(render_mode='human', living_reward=-0.0)
+    policy_evaluation(env)
diff --git a/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py b/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py
new file mode 100644
index 0000000..ccfd39c
--- /dev/null
+++ b/irlc/lectures/lec09/unf_policy_evaluation_gridworld.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc import interactive, train
+from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2
+
+def policy_evaluation(env=None):
+    agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None, only_update_current=False)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=100)
+    env.close()
+
+def policy_improvement(env=None, q_mode=True):
+    agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=1000)
+    env.close()
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    policy_evaluation(env)
diff --git a/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py b/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py
new file mode 100644
index 0000000..a438af8
--- /dev/null
+++ b/irlc/lectures/lec09/unf_policy_evaluation_stepwise_gridworld.py
@@ -0,0 +1,20 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc import interactive, train
+from irlc.gridworld.demo_agents.hidden_agents import PolicyEvaluationAgent2
+
+def policy_evaluation_stepwise(env=None):
+    agent = PolicyEvaluationAgent2(env, gamma=1., steps_between_policy_improvement=None, only_update_current=True)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=100)
+    env.close()
+
+def policy_improvement(env=None, q_mode=True):
+    agent = PolicyEvaluationAgent2(env, gamma=1.,steps_between_policy_improvement=20)
+    env, agent = interactive(env, agent)
+    train(env, agent, num_episodes=1000)
+    env.close()
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    policy_evaluation_stepwise(env)
diff --git a/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py b/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py
new file mode 100644
index 0000000..7242b00
--- /dev/null
+++ b/irlc/lectures/lec09/unf_policy_improvement_frozenlake.py
@@ -0,0 +1,7 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment, FrozenLake
+from irlc.lectures.unf.unf_policy_evaluation_gridworld import policy_improvement
+
+if __name__ == "__main__":
+    env = FrozenLake(render_mode='human', living_reward=-0)
+    policy_improvement(env)
diff --git a/irlc/lectures/lec09/unf_policy_improvement_gridworld.py b/irlc/lectures/lec09/unf_policy_improvement_gridworld.py
new file mode 100644
index 0000000..eb6d762
--- /dev/null
+++ b/irlc/lectures/lec09/unf_policy_improvement_gridworld.py
@@ -0,0 +1,7 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.lectures.unf.unf_policy_evaluation_gridworld import policy_improvement
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    policy_improvement(env)
diff --git a/irlc/lectures/lec09/unf_vi_frozenlake.py b/irlc/lectures/lec09/unf_vi_frozenlake.py
new file mode 100644
index 0000000..4ece4f2
--- /dev/null
+++ b/irlc/lectures/lec09/unf_vi_frozenlake.py
@@ -0,0 +1,17 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import FrozenLake
+from irlc.ex01.agent import train
+from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3
+from irlc import interactive
+
+def q1_vi(env):
+    agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=False)
+    env, agent = interactive(env, agent)
+    env.reset()
+    train(env, agent, num_episodes=100)
+    env.close()
+
+
+if __name__ == "__main__":
+    env = FrozenLake(render_mode='human', living_reward=-0)
+    q1_vi(env)
diff --git a/irlc/lectures/lec09/unf_vi_gridworld.py b/irlc/lectures/lec09/unf_vi_gridworld.py
new file mode 100644
index 0000000..766303f
--- /dev/null
+++ b/irlc/lectures/lec09/unf_vi_gridworld.py
@@ -0,0 +1,16 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.ex01.agent import train
+from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3
+from irlc import interactive
+
+def q1_vi(env):
+    agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=False)
+    env, agent = interactive(env, agent)
+    env.reset()
+    train(env, agent, num_episodes=100)
+    env.close()
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
+    q1_vi(env)
diff --git a/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py b/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py
new file mode 100644
index 0000000..152a91b
--- /dev/null
+++ b/irlc/lectures/lec09/unf_vi_gridworld_stepwise.py
@@ -0,0 +1,16 @@
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.gridworld.gridworld_environments import BookGridEnvironment
+from irlc.ex01.agent import train
+from irlc.gridworld.demo_agents.hidden_agents import ValueIterationAgent3
+from irlc import interactive
+
+def q1_vi(env):
+    agent = ValueIterationAgent3(env, epsilon=0, gamma=1, only_update_current=True)
+    env, agent = interactive(env, agent)
+    env.reset()
+    train(env, agent, num_episodes=100)
+    env.close()
+
+if __name__ == "__main__":
+    env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=False)
+    q1_vi(env)
-- 
GitLab