Skip to content
Snippets Groups Projects
Commit 2b44091b authored by tuhe's avatar tuhe
Browse files

Week 11

parent c16ac062
Branches
No related tags found
No related merge requests found
Showing
with 249 additions and 74 deletions
......@@ -10,10 +10,10 @@ exam_tabular_examples
#solutions/ex07
#solutions/ex08
# solutions/ex09
solutions/ex10
solutions/ex11
solutions/ex12
solutions/ex13
#solutions/ex10
#solutions/ex11
#solutions/ex12
#solutions/ex13
#irlc/ex03
#irlc/ex04
......@@ -36,8 +36,8 @@ solutions/ex13
#irlc/tests/tests_week07.py
#irlc/tests/tests_week08.py
# irlc/tests/tests_week09.py
irlc/tests/tests_week10.py
irlc/tests/tests_week11.py
#irlc/tests/tests_week10.py
#irlc/tests/tests_week11.py
irlc/tests/tests_week12.py
irlc/tests/tests_week13.py
......@@ -74,7 +74,7 @@ irlc/exam/exam20*/solution
#irlc/lectures/lec08
# irlc/lectures/lec09
#irlc/lectures/lec10
irlc/lectures/lec11
#irlc/lectures/lec11
irlc/lectures/lec12
irlc/lectures/lec13
......
......@@ -2,12 +2,17 @@
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.ex10.mc_agent import MCAgent
# from irlc.lectures.lec10.utils import MCAgentResettable
import numpy as np
if __name__ == "__main__":
np.random.seed(433)
env = BookGridEnvironment(render_mode='human',zoom=2)
env = BookGridEnvironment(render_mode='human',zoom=2, living_reward=-0.05)
# agent = MCAgent(env, gamma=0.9, epsilon=0.15, alpha=0.1, first_visit=True)
from irlc.lectures.lec10.utils import agent_reset
MCAgent.reset = agent_reset
agent = MCAgent(env, gamma=1.0, epsilon=0.15, alpha=None, first_visit=True)
# env, agent = interactive(env, agent)
keyboard_play(env,agent,method_label='MC control')
......@@ -12,6 +12,8 @@ class CaughtGrid(GridworldEnvironment):
def __init__(self, **kwargs):
super().__init__(map, living_reward=1, zoom=1.5, **kwargs)
if __name__ == "__main__":
env = CaughtGrid(view_mode=1, render_mode='human')
agent = MCEvaluationAgent(env, gamma=1, alpha=None)
......
......@@ -11,21 +11,6 @@ def keyboard_play(env, agent, method_label='MC',autoplay=False, num_episodes=100
env.close()
def automatic_play(env, agent, method_label='MC'):
# agent = PlayWrapper(agent, env)
env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
train(env, agent, num_episodes=1000)
env.close()
def automatic_play_value(env, agent, method_label='MC'):
agent.label = method_label
env, agent = interactive(env, agent)
# env = VideoMonitor(env, agent=agent, fps=40, continious_recording=True, agent_monitor_keys=('v'), render_kwargs={'method_label': method_label})
# agent = PlayWrapper(agent, env)
train(env, agent, num_episodes=1000)
env.close()
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human', zoom=2, living_reward=-0.05)
from irlc.ex10.mc_agent import MCAgent
......
......@@ -6,6 +6,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
if __name__ == "__main__":
env = BookGridEnvironment(view_mode=1, render_mode='human', living_reward=-0.05)
agent = MCEvaluationAgent(env, gamma=.9, alpha=None, first_visit=False)
agent = MCEvaluationAgent(env, gamma=1, alpha=None, first_visit=False)
keyboard_play_value(env,agent,method_label='MC every')
......@@ -6,6 +6,7 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
import numpy as np
from irlc import interactive, train
class MCAgentOneState(MCEvaluationAgent):
def __init__(self, *args, state=None, **kwargs):
a = 34
......@@ -17,18 +18,19 @@ class MCAgentOneState(MCEvaluationAgent):
def _clear_states(self, val=None):
for s in self.env.mdp.nonterminal_states:
# for a in self.env.mdp.A(s):
# self.Q[s,a] = 0
if s != self.state:
self.returns_sum_S[s] = val
self.returns_count_N[s] = val
if s in self.v:
k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
if not self.env.mdp.is_terminal(k):
del self.v[s]
def reset(self):
from irlc.lectures.lec10.utils import agent_reset
agent_reset(self)
self._clear_states(None)
def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
# self.episode = [e for e in self.episode if e[0] == self.state]
......@@ -39,6 +41,7 @@ class MCAgentOneState(MCEvaluationAgent):
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human', living_reward=-0.05, print_states=True, zoom=2)
agent = MCAgentOneState(env, gamma=1, alpha=None, first_visit=True)
method_label = 'MC (gamma=1)'
agent.label = method_label
......@@ -49,16 +52,3 @@ if __name__ == "__main__":
num_episodes = 1000
train(env, agent, num_episodes=num_episodes)
env.close()
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
lt = np.linspace(np.log(1000), np.log(2000) + 0*5000)
plt.plot(lt, 5 + 2 * np.sqrt(lt / 500), 'k-')
plt.plot(lt, 10 + 2 * np.sqrt(lt / (np.exp(lt) - 500)), 'r-')
plt.xlabel('log(t)')
plt.show()
# keyboard_play(env,agent,method_label='MC (alpha=0.5)')
......@@ -7,40 +7,6 @@ from irlc.ex10.mc_evaluate import MCEvaluationAgent
import numpy as np
from irlc import interactive, train
# class MCAgentOneState(MCEvaluationAgent):
# def __init__(self, *args, state=None, **kwargs):
# a = 34
# super().__init__(*args, **kwargs)
# if state is None:
# state = self.env.mdp.initial_state
# self.state = state
# self._clear_states()
#
# def _clear_states(self, val=None):
# for s in self.env.mdp.nonterminal_states:
# # for a in self.env.mdp.A(s):
# # self.Q[s,a] = 0
# if s != self.state:
# self.returns_sum_S[s] = val
# self.returns_count_N[s] = val
# if s in self.v:
# k = next(self.env.mdp.Psr(s, self.env.mdp.A(s)[0]).keys().__iter__() )[0]
# if not self.env.mdp.is_terminal(k):
#
# del self.v[s]
#
# def train(self, s, a, r, sp, done=False, info_s=None, info_sp=None):
# # self.episode = [e for e in self.episode if e[0] == self.state]
# self._clear_states(0)
# super().train(s, a, r, sp, done)
# # Clear out many of the state, actions:
# self._clear_states(None)
# # for s in self.env.mdp.nonterminal_states:
# # if s != self.state:
# # self.v[s] = None
#
# pass
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
......
......@@ -2,8 +2,10 @@
from irlc.lectures.lec10.lecture_10_mc_q_estimation import automatic_play_value
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.ex10.td0_evaluate import TD0ValueAgent
from irlc.lectures.lec10.utils import agent_reset
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human', living_reward=-0.05)
TD0ValueAgent.reset = agent_reset
agent = TD0ValueAgent(env, gamma=1.0, alpha=0.2)
automatic_play_value(env,agent,method_label='TD(0)')
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex10.mc_agent import MCAgent
from irlc.ex09.rl_agent import TabularQ
class MCAgentResettable(MCAgent):
def reset(self):
return agent_reset(self)
def agent_reset(self):
# General reset option. Wroks on many agents.
attrs = ['returns_sum_S', 'returns_count_N', 'Q', 'v']
for attr in attrs:
if hasattr(self, attr):
at = getattr(self, attr)
if isinstance(at, dict):
at.clear()
if hasattr(self, 'Q') and isinstance(self.Q, TabularQ):
self.Q.q_.clear()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.berkley.rl.semi_grad_q import LinearSemiGradQAgent
from irlc.ex11.feature_encoder import GridworldXYEncoder
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human')
agent = LinearSemiGradQAgent(env, gamma=0.95, epsilon=0.1, alpha=.01, q_encoder=GridworldXYEncoder(env))
keyboard_play(env, agent, method_label="Q-lin-xy")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.gridworld.gridworld_environments import OpenGridEnvironment
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
from irlc.ex11.sarsa_agent import SarsaAgent
def open_play(Agent, method_label, frames_per_second=30, **args):
env = OpenGridEnvironment(render_mode='human', frames_per_second=frames_per_second)
agent = Agent(env, gamma=0.99, epsilon=0.1, alpha=.5, **args)
method_label = f"{method_label} (gamma=0.99, epsilon=0.1, alpha=0.5)"
keyboard_play(env, agent, method_label=method_label)
if __name__ == "__main__":
open_play(SarsaAgent, method_label="Sarsa")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
from irlc.pacman.pacman_resources import WHITE, BLACK
from irlc.utils.graphics_util_pygame import GraphicsUtilGym
from irlc.lectures.lec11.mountain_car_env import MountainCarVisualization
from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
if __name__ == '__main__':
from irlc import Agent, interactive, train
env = FancyMountainCar(render_mode='human')
num_of_tilings = 8
alpha = 0.3
# env = gym.make("MountainCar-v0")
agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
# agent = Agent(env)
env, agent = interactive(env, agent)
train(env, agent, num_episodes=10)
env.close()
pass
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
class RandomWeightAgent(LinearSemiGradSarsa):
def train(self, *args, **kwargs):
pass
pass
if __name__ == '__main__':
from irlc import Agent, interactive, train
env = FancyMountainCar(render_mode='human')
num_of_tilings = 8
alpha = 0
agent = LinearSemiGradSarsa(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
# agent = Agent(env)
env, agent = interactive(env, agent)
train(env, agent, num_episodes=10)
env.close()
pass
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.lectures.lec11.mountain_car_env import FancyMountainCar
from irlc.ex11.semi_grad_sarsa import LinearSemiGradSarsa
class RandomWeightAgent(LinearSemiGradSarsa):
def train(self, *args, **kwargs):
super().train(*args, **kwargs)
self.Q.w = np.random.randn(self.Q.w.shape[0])
if __name__ == '__main__':
from irlc import Agent, interactive, train
env = FancyMountainCar(render_mode='human')
num_of_tilings = 8
alpha = 0.3
# env = gym.make("MountainCar-v0")
agent = RandomWeightAgent(env) #(env, gamma=1, alpha=alpha/num_of_tilings, epsilon=0)
env, agent = interactive(env, agent)
train(env, agent, num_episodes=10)
env.close()
pass
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
from irlc.ex11.nstep_sarsa_agent import SarsaNAgent
from irlc.exam_tabular_examples.sarsa_nstep_delay import SarsaDelayNAgent
from irlc.lectures.lec11.lecture_10_sarsa_open import open_play
if __name__ == "__main__":
# env = OpenGridEnvironment()
# agent = (env, gamma=0.95, epsilon=0.1, alpha=.5)
open_play(SarsaDelayNAgent, method_label="N-step Sarsa n=8", n=8)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex11.semi_grad_q import LinearSemiGradQAgent
from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
from irlc.ex11.feature_encoder import SimplePacmanExtractor
import matplotlib.pyplot as plt
# from irlc.utils.video_monitor import VideoMonitor
from irlc.ex01.agent import train
# from irlc import PlayWrapper
from irlc import interactive
def play_pacman(env, agent, layout = 'smallGrid'):
train(env, agent, num_episodes=100)
env2 = PacmanWinWrapper(env)
# env2 = Monitor(env2, directory="experiments/randomdir", force=True)
# env2 = VideoMonitor(env2)
env2, agent = interactive(env, agent)
agent.epsilon = 0
agent.alpha = 0
# agent = PlayWrapper(agent, env2)
train(env2, agent, num_episodes=100)
plt.show()
env.close()
if __name__ == "__main__":
layout = 'smallGrid'
env = PacmanEnvironment(animate_movement=True, layout=layout, render_mode='human', frames_per_second=100)
qex = SimplePacmanExtractor(env)
agent = LinearSemiGradQAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8, q_encoder=qex)
play_pacman(env, agent, layout = 'smallGrid')
# main_plot('experiments/q_lin')
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.pacman.pacman_environment import PacmanEnvironment, PacmanWinWrapper
# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
# from irlc.utils.player_wrapper_pyglet import PlayWrapper
from irlc import main_plot
import matplotlib.pyplot as plt
# from irlc.utils.video_monitor import VideoMonitor
from irlc.ex01.agent import train
# from irlc.lectures.lecture_09_mc import keyboard_play
from irlc.ex11.q_agent import QAgent
from irlc import interactive
def play_pacman(env, agent, layout = 'smallGrid'):
train(env, agent, num_episodes=100)
env2 = PacmanWinWrapper(env)
# env2 = Monitor(env2, directory="experiments/randomdir", force=True)
# env2 = VideoMonitor(env2)
env2, agent = interactive(env2, agent)
agent.epsilon = 0
agent.alpha = 0
# agent = PlayWrapper(agent, env2)
train(env2, agent, num_episodes=100)
plt.show()
env.close()
if __name__ == "__main__":
layout = 'smallGrid'
env = PacmanEnvironment(animate_movement=False, layout=layout, render_mode='human')
agent = QAgent(env, epsilon=0.05, alpha=0.1, gamma=0.8)
# from irlc import PlayWrapper
# agent = PlayWrapper(agent, env)
play_pacman(env, agent, layout = 'smallGrid')
# main_plot('experiments/q_lin')
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# from irlc.berkley.rl.feature_encoder import SimplePacmanExtractor
from irlc.gridworld.gridworld_environments import BookGridEnvironment
from irlc.lectures.lec10.lecture_10_mc_q_estimation import keyboard_play
from irlc.ex11.q_agent import QAgent
if __name__ == "__main__":
env = BookGridEnvironment(render_mode='human')
agent = QAgent(env, gamma=0.95, epsilon=0.1, alpha=.2)
keyboard_play(env, agent, method_label="Q-learning")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.gridworld.gridworld_environments import CliffGridEnvironment, CliffGridEnvironment2
from irlc.ex11.q_agent import QAgent
# def cliffwalk(env, agent, method_label="method"):
# agent = PlayWrapper(agent, env)
# env = VideoMonitor(env, agent=agent, fps=100, continious_recording=True, agent_monitor_keys=('pi', 'Q'), render_kwargs={'method_label': method_label})
# train(env, agent, num_episodes=200)
# env.close()
from irlc.lectures.lec11.lecture_11_sarsa_cliff import cliffwalk, gamma, alpha, epsi
if __name__ == "__main__":
import numpy as np
np.random.seed(1)
env = CliffGridEnvironment2(zoom=.8, render_mode='human')
agent = QAgent(env, gamma=gamma, epsilon=epsi, alpha=alpha)
cliffwalk(env, agent, method_label="Q-learning")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment