Skip to content
Snippets Groups Projects
Commit c13c7d04 authored by tuhe's avatar tuhe
Browse files

Exercises

parent 3344e577
No related branches found
No related tags found
No related merge requests found
Showing
with 1399 additions and 0 deletions
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""This directory contains the exercises for week 6."""
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""
References:
[Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
"""
import numpy as np
import matplotlib.pyplot as plt
from irlc import savepdf
from irlc import train
from irlc.ex06.model_boeing import BoeingEnvironment
from irlc.ex06.lqr_agent import LQRAgent
from irlc.ex03.control_model import ControlModel
import scipy
def boeing_simulation():
env = BoeingEnvironment(Tmax=10)
model = env.discrete_model.continuous_model # get the model from the Boeing environment
dt = env.dt # Get the discretization time.
A, B, d = compute_A_B_d(model, dt)
# Use compute_Q_R_q to get the Q, R, and q matrices in the discretized system
# TODO: 1 lines missing.
raise NotImplementedError("Compute Q, R and q here")
## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
#----------------------------------------------------------------------------------------------------------------------------
# agent = LQRAgent(env, A=A??????????????????????????
raise NotImplementedError("Use your LQRAgent to plan using the system matrices.")
stats, trajectories = train(env, agent, return_trajectory=True)
return stats, trajectories, env
def compute_Q_R_q(model : ControlModel, dt : float):
cost = model.get_cost() # Get the continuous-time cost-function
# use print(cost) to see what it contains.
# Then get the discretized matrices using the techniques described in (Her25, Subsection 13.1.6).
# TODO: 3 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
return Q, R, q
def compute_A_B_d(model : ControlModel, dt : float):
if model.d is None:
d = np.zeros((model.state_size,)) # Ensure d is set to a zero vector if it is not defined.
else:
d = model.d
A_discrete = scipy.linalg.expm(model.A * dt) # This is the discrete A-matrix computed using the matrix exponential
# Now it is your job to define B_discrete and d_discrete.
# TODO: 2 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
return A_discrete, B_discrete, d_discrete.flatten()
def boeing_experiment():
_, trajectories, env = boeing_simulation()
model = env.discrete_model.continuous_model
dt = env.dt
Q, R, q = compute_Q_R_q(model, dt)
print("Discretization time is", dt)
print("Original q-vector was:", model.get_cost().q)
print("Discretized version is:", q)
t = trajectories[-1]
out = t.state @ model.P.T
plt.plot(t.time, out[:, 0], '-', label=env.observation_labels[0])
plt.plot(t.time, out[:, 1], '-', label=env.observation_labels[1])
plt.grid()
plt.legend()
plt.xlabel("Time/seconds")
plt.ylabel("Output")
savepdf("boing_lqr_output")
plt.show(block=False)
plt.close()
plt.plot(t.time[:-1], t.action[:, 0], '-', label=env.action_labels[0])
plt.plot(t.time[:-1], t.action[:, 1], '-', label=env.action_labels[1])
plt.xlabel("Time/seconds")
plt.ylabel("Control action")
plt.grid()
plt.legend()
savepdf("boing_lqr_action")
plt.show()
if __name__ == "__main__":
boeing_experiment()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""
References:
[Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
"""
import numpy as np
import matplotlib.pyplot as plt
from irlc import bmatrix
from irlc import savepdf
def LQR(A : list, # Dynamic
B : list, # Dynamics
d : list =None, # Dynamics (optional)
Q : list=None,
R: list=None,
H : list=None,
q : list=None,
r : list=None,
qc : list=None,
QN : np.ndarray =None, # Terminal cost term
qN : np.ndarray=None, # Terminal cost term
qcN : np.ndarray =None, # Terminal cost term.
mu : float =0 # regularization parameter which will only be relevant next week.
):
r"""
Implement the LQR as defined in (Her25, Algorithm 22). I recommend viewing this documentation online (documentation for week 6).
When you solve this exercise, look at the algorithm in the book. Since the LQR problem is on the form:
.. math::
x_{k+1} = A_k x_k + B_k u_k + d_k
For :math:`k=0,\dots,N-1` this means there are :math:`N` matrices :math:`A_k`. This is implemented by assuming that
:python:`A` (i.e., the input argument) is a :python:`list` of length :math:`N` so that :python:`A[k]` corresponds
to :math:`A_k`.
Similar conventions are used for the cost term (please see the lecture notes or the online documentation for their meaning). Recall it has the form:
.. math::
c(x_k, u_k) = \frac{1}{2} \mathbf x_k^\top Q_k \mathbf x_k + \frac{1}{2} \mathbf q_k^\top \mathbf x_k + q_k + \cdots
When the function is called, the vector :math:`\textbf{q}_k` corresponds to :python:`q` and the constant :math:`q_k` correspond to :python:`qc` (q-constant).
.. note::
Only the terms :python:`A` and :python:`B` are required. The rest of the terms will default to 0-matrices.
The LQR algorithm will ultimately compute a control law of the form:
.. math::
\mathbf u_k = L_k \mathbf x_k + \mathbf l_k
And a cost-to-go function as:
.. math::
J_k(x_k) = \frac{1}{2} \mathbf x_k^\top V_k \mathbf x_k + v_k^\top \mathbf x_k + v_k
Again there are :math:`N-1` terms. The function then return :python:`return (L, l), (V, v, vc)` so that :python:`L[k]` corresponds to :math:`L_k`.
:param A: A list of :python:`np.ndarray` containing all terms :math:`A_k`
:param B: A list of :python:`np.ndarray` containing all terms :math:`B_k`
:param d: A list of :python:`np.ndarray` containing all terms :math:`\mathbf d_k` (optional)
:param Q: A list of :python:`np.ndarray` containing all terms :math:`Q_k` (optional)
:param R: A list of :python:`np.ndarray` containing all terms :math:`R_k` (optional)
:param H: A list of :python:`np.ndarray` containing all terms :math:`H_k` (optional)
:param q: A list of :python:`np.ndarray` containing all terms :math:`\mathbf q_k` (optional)
:param r: A list of :python:`np.ndarray` containing all terms :math:`\mathbf r_k` (optional)
:param qc: A list of :python:`float` containing all terms :math:`q_k` (i.e., constant terms) (optional)
:param QN: A :python:`np.ndarray` containing the terminal cost term :math:`Q_N` (optional)
:param qN: A :python:`np.ndarray` containing the terminal cost term :math:`\mathbf q_N` (optional)
:param qcN: A :python:`np.ndarray` containing the terminal cost term :math:`q_N`
:param mu: A regularization term which is useful for iterative-LQR (next week). Default to 0.
:return: A tuple of the form :python:`(L, l), (V, v, vc)` corresponding to the control and cost-matrices.
"""
N = len(A)
n,m = B[0].shape
# Initialize empty lists for control matrices and cost terms
L, l = [None]*N, [None]*N
V, v, vc = [None]*(N+1), [None]*(N+1), [None]*(N+1)
# Initialize constant cost-function terms to zero if not specified.
# They will be initialized to zero, meaning they have no effect on the update rules.
QN = np.zeros((n,n)) if QN is None else QN
qN = np.zeros((n,)) if qN is None else qN
qcN = 0 if qcN is None else qcN
H, q, qc, r = init_mat(H,m,n,N=N), init_mat(q,n,N=N), init_mat(qc,1,N=N), init_mat(r,m,N=N)
d = init_mat(d,n, N=N)
""" In the next line, you should initialize the last cost-term. This is similar to how we in DP had the initialization step
> J_N(x_N) = g_N(x_N)
Except that since x_N is no longer discrete, we store it as matrices/vectors representing a second-order polynomial, i.e.
> J_N(X_N) = 1/2 * x_N' V[N] x_N + v[N]' x_N + vc[N]
"""
# TODO: 1 lines missing.
raise NotImplementedError("Initialize V[N], v[N], vc[N] here")
In = np.eye(n)
for k in range(N-1,-1,-1):
# When you update S_uu and S_ux remember to add regularization as the terms ... (V[k+1] + mu * In) ...
# Note that that to find x such that
# >>> x = A^{-1} y this
# in a numerically stable manner this should be done as
# >>> x = np.linalg.solve(A, y)
# The terms you need to update will be, in turn:
# Suu = ...
# Sux = ...
# Su = ...
# L[k] = ...
# l[k] = ...
# V[k] = ...
# V[k] = ...
# v[k] = ...
# vc[k] = ...
## TODO: Half of each line of code in the following 4 lines have been replaced by garbage. Make it work and remove the error.
#----------------------------------------------------------------------------------------------------------------------------
# Suu = R[k] + B[k].T @ (????????????????????????
# Sux = H[k] + B[k].T @ (????????????????????????
# Su = r[k] + B[k].T @ v[k + 1?????????????????????????????
# L[k] = -np.linal?????????????????
raise NotImplementedError("Insert your solution and remove this error.")
l[k] = -np.linalg.solve(Suu, Su) # You get this for free. Notice how we use np.lingalg.solve(A,x) to compute A^{-1} x
V[k] = Q[k] + A[k].T @ V[k+1] @ A[k] - L[k].T @ Suu @ L[k]
V[k] = 0.5 * (V[k] + V[k].T) # I recommend putting this here to keep V positive semidefinite
# You get these for free: Compare to the code in the algorithm.
v[k] = q[k] + A[k].T @ (v[k+1] + V[k+1] @ d[k]) + Sux.T @ l[k]
vc[k] = vc[k+1] + qc[k] + d[k].T @ v[k+1] + 1/2*( d[k].T @ V[k+1] @ d[k] ) + 1/2*l[k].T @ Su
return (L,l), (V,v,vc)
def init_mat(X, a, b=None, N=None):
"""
Helper function. Check if X is None, and if so return a list
[A, A,....]
which is N long and where each A is a (a x b) zero-matrix, else returns X repeated N times:
[X, X, ...]
"""
M0 = np.zeros((a,) if b is None else (a, b))
if X is not None:
return [m if m is not None else M0 for m in X]
else:
return [M0] * N
def lqr_rollout(x0,A,B,d,L,l):
"""
Compute a rollout (states and actions) given solution from LQR controller function.
x0 is a vector (starting state), and A, B, d and L, l are lists of system/control matrices.
"""
x, states,actions = x0, [x0], []
n,m = B[0].shape
N = len(L)
d = init_mat(d,n,1,N) # Initialize as a list of zero matrices [ np.zeros((n,1)), np.zeros((n,1)), ...]
l = init_mat(l,m,1,N) # Initialize as a list of zero matrices [ np.zeros((m,1)), np.zeros((m,1)), ...]
for k in range(N):
u = L[k] @ x + l[k]
x = A[k] @ x + B[k] @ u + d[k]
actions.append(u)
states.append(x)
return states, actions
if __name__ == "__main__":
"""
Solve this problem (see also lecture notes for the same example)
http://cse.lab.imtlucca.it/~bemporad/teaching/ac/pdf/AC2-04-LQR-Kalman.pdf
"""
N = 20
A = np.ones((2,2))
A[1,0] = 0
B = np.asarray([[0], [1]])
Q = np.zeros((2,2))
R = np.ones((1,1))
print("System matrices A, B, Q, R")
print(bmatrix(A))
print(bmatrix(B))
print(bmatrix(Q))
print(bmatrix(R))
for rho in [0.1, 10, 100]:
Q[0,0] = 1/rho
(L,l), (V,v,vc) = LQR(A=[A]*N, B=[B]*N, d=None, Q=[Q]*N, R=[R]*N, QN=Q)
x0 = np.asarray( [[1],[0]])
trajectory, actions = lqr_rollout(x0,A=[A]*N, B=[B]*N, d=None,L=L,l=l)
xs = np.concatenate(trajectory, axis=1)[0,:]
plt.plot(xs, 'o-', label=f'rho={rho}')
k = 10
print(f"Control matrix in u_k = L_k x_k + l_k at k={k}:", L[k])
for k in [N-1,N-2,0]:
print(f"L[{k}] is:", L[k].round(4))
plt.title("Double integrator")
plt.xlabel('Steps $k$')
plt.ylabel('$x_1 = $ x[0]')
plt.legend()
plt.grid()
savepdf("dlqr_double_integrator")
plt.show()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.ex06.dlqr import LQR
def urnd(sz):
return np.random.uniform(-1, 1, sz)
def check_LQR():
np.random.seed(42)
n,m,N = 3,2,4
"""
Create a randomized, nonsense control problem and solve it. Since seed is fixed we can expect same solution.
"""
# system tersm
A = [urnd((n, n)) for _ in range(N)]
B = [urnd((n, m)) for _ in range(N)]
d = [urnd((n,)) for _ in range(N)]
# cost terms
Q = [urnd((n, n)) for _ in range(N)]
R = [urnd((m, m)) for _ in range(N)]
H = [urnd((m, n)) for _ in range(N)]
q = [urnd((n,)) for _ in range(N)]
qc = [urnd(()) for _ in range(N)]
r = [urnd((m,)) for _ in range(N)]
# terminal costs
QN = urnd((n, n))
qN = urnd((n,))
qcN = urnd(())
return LQR(A=A, B=B, d=d, Q=Q, R=R, H=H, q=q, r=r, qc=qc, QN=QN, qN=qN, qcN=qcN, mu=0)
if __name__ == "__main__":
(L, l), (V, v, vc) = check_LQR()
N = len(V)-1
print(", ".join([f"l[{k}]={l[k].round(4)}" for k in [N - 1, N - 2, 0]]))
print("\n".join([f"L[{k}]={L[k].round(4)}" for k in [N - 1, N - 2, 0]]))
print("\n".join([f"V[{k}]={V[k].round(4)}" for k in [0]]))
print(", ".join([f"v[{k}]={v[k].round(4)}" for k in [N, N - 1, 0]]))
print(", ".join([f"vc[{k}]={vc[k].round(4)}" for k in [N, N - 1, 0]]))
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex04.locomotive import LocomotiveEnvironment
from irlc import train, plot_trajectory, savepdf, Agent
from irlc.ex06.dlqr import LQR
from irlc.ex04.control_environment import ControlEnvironment
import numpy as np
import matplotlib.pyplot as plt
class LQRAgent(Agent):
def __init__(self, env : ControlEnvironment, A, B, Q, R, d=None, q=None):
N = int((env.Tmax / env.dt)) # Obtain the planning horizon
""" Define A, B as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B x[t] + d.
You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B """
""" Define self.L, self.l here as the (lists of) control matrices. """
## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
#----------------------------------------------------------------------------------------------------------------------------
# (self.L, self.l), _ = LQR(A=[A]*N, B=[B]*N, d=[d]*N if d is not No???????????????????????????????????????????????????????????????????
raise NotImplementedError("Insert your solution and remove this error.")
self.dt = env.dt
super().__init__(env)
def pi(self,x, k, info=None):
"""
Compute the action here using u = L_k x + l_k.
You should use self.L, self.l to get the control matrices (i.e. L_k = self.L[k] ),
"""
# TODO: 1 lines missing.
raise NotImplementedError("Compute current action here")
return u
if __name__ == "__main__":
# Make a guess at the system matrices for planning. We will return on how to compute these exactly in a later exercise.
A = np.ones((2, 2))
A[1, 0] = 0
B = np.asarray([[0], [1]])
Q = np.eye(2)*3
R = np.ones((1, 1))*2
q = np.asarray([-1.1, 0 ])
# Create and test our LQRAgent.
env = LocomotiveEnvironment(render_mode='human', Tmax=10, slope=1)
agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q)
stats, traj = train(env, agent, num_episodes=1)
env.reset()
savepdf("locomotive_snapshot.pdf", env=env) # Make a plot for the exercise file.
env.state_labels = ["x(t)", "v(t)"]
env.action_labels = ["u(t)"]
plot_trajectory(traj[0], env)
plt.show(block=True)
savepdf("lqr_agent")
plt.show()
env.close()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import matplotlib.pyplot as plt
import numpy as np
from irlc import savepdf, train
from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent
from irlc.ex06.lqr_agent import LQRAgent
from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment
from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
class ConstantLQRAgent(LQRAgent):
# TODO: 3 lines missing.
raise NotImplementedError("Complete this agent here. You need to update the policy-function: def pi(self, ..).")
def get_Kp_Kd(L0):
# TODO: 1 lines missing.
raise NotImplementedError("Use lqr_agent.L to define Kp and Kd.")
return Kp, Kd
if __name__ == "__main__":
Delta = 0.06 # Time discretization constant
# Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization.
env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, render_mode=None) # set render_mode='human' to see the oscillator.
model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment.
# Compute the discretized A, B and d matrices using the helper functions we defined in the Boeing problem.
# Note that these are for the discrete environment: x_{k+1} = A x_k + B u_k + d
A, B, d = compute_A_B_d(model, Delta)
Q, R, q = compute_Q_R_q(model, Delta)
# Run the LQR agent
lqr_agent = LQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
_, traj1 = train(env, lqr_agent, return_trajectory=True)
# Part 1. Build an agent that always takes actions u_k = L_0 x_k + l_0
constant_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
# Check that its policy is independent of $k$:
x0, _ = env.reset()
print(f"Initial state is {x0=}")
print(f"Action at time step k=0 {constant_agent.pi(x0, k=0)=}")
print(f"Action at time step k=5 (should be the same) {constant_agent.pi(x0, k=0)=}")
_, traj2 = train(env, constant_agent, return_trajectory=True)
# Part 2. Use the L and l matrices (see lqr_agent.L and lqr_agent.l)
# to select Kp and Kd in a PID agent. Then let's use the Locomotive agent to see the effect of the controller.
# Use render_mode='human' to see its effect.
# We only need to use L.
# Hint: compare the form of the LQR and PID controller and use that to select Kp and Kd.
Kp, Kd = get_Kp_Kd(lqr_agent.L[0]) # Use lqr_agent.L to define Kp and Kd.
# Define and run the PID agent.
pid_agent = PIDLocomotiveAgent(env, env.dt, Kp=Kp, Kd=Kd)
_, traj3 = train(env, pid_agent, return_trajectory=True)
# Plot all actions and state sequences.
plt.figure(figsize=(10,5))
plt.grid()
plt.plot(traj1[0].time[:-1], traj1[0].action, label="Optimal LQR action sequence")
plt.plot(traj2[0].time[:-1], traj2[0].action, '.-', label="Constant LQR action sequence")
plt.plot(traj3[0].time[:-1], traj3[0].action, label="PID agent action sequence")
plt.xlabel("Time / Seconds")
plt.ylabel("Action / Newtons")
plt.ylim([-.2, .2])
plt.legend()
savepdf("pid_lqr_actions")
plt.show(block=True)
plt.figure(figsize=(10, 5))
plt.grid()
plt.plot(traj1[0].time, traj1[0].state[:, 0], label="Optimal LQR states x(t)")
plt.plot(traj2[0].time, traj2[0].state[:, 0], label="Constant LQR states x(t)")
plt.plot(traj3[0].time, traj3[0].state[:, 0], label="PID agent states x(t)")
plt.xlabel("Time / Seconds")
plt.ylabel("Position x(t) / Meters")
plt.ylim([-1, 1])
plt.legend()
savepdf("pid_lqr_states")
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.ex04.discrete_control_model import DiscreteControlModel
from irlc.ex04.control_environment import ControlEnvironment
from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
class BoeingModel(LinearQuadraticModel):
"""
Boeing 747 level flight example.
See: https://books.google.dk/books?id=tXZDAAAAQBAJ&pg=PA147&lpg=PA147&dq=boeing+747+flight+0.322+model+longitudinal+flight&source=bl&ots=L2RpjCAWiZ&sig=ACfU3U2m0JsiHmUorwyq5REcOj2nlxZkuA&hl=en&sa=X&ved=2ahUKEwir7L3i6o3qAhWpl4sKHQV6CdcQ6AEwAHoECAoQAQ#v=onepage&q=boeing%20747%20flight%200.322%20model%20longitudinal%20flight&f=false
Also: https://web.stanford.edu/~boyd/vmls/vmls-slides.pdf
"""
state_labels = ["Longitudinal velocity (x) ft/sec", "Velocity in y-axis ft/sec", "Angular velocity",
"angle wrt. horizontal"]
action_labels = ['Elevator', "Throttle"]
observation_labels = ["Airspeed", "Climb rate"]
def __init__(self, output=None):
if output is None:
output = [10, 0]
# output = [10, 0]
A = [[-0.003, 0.039, 0, -0.322],
[-0.065, -.319, 7.74, 0],
[.02, -.101, -0.429, 0],
[0, 0, 1, 0]]
B = [[.01, 1],
[-.18, -.04],
[-1.16, .598],
[0, 0]]
A, B = np.asarray(A), np.asarray(B)
self.u0 = 7.74 # speed in hundred feet/seconds
self.P = np.asarray([[1, 0, 0, 0], [0, -1, 0, 7.74]]) # Projection of state into airspeed
dt = 0.1 # Scale the cost by this factor.
# Set up the cost:
self.Q_obs = np.eye(2)
Q = self.P.T @ self.Q_obs @ self.P / dt
R = np.eye(2) / dt
q = -np.asarray(output) @ self.Q_obs @ self.P / dt
super().__init__(A=A, B=B, Q=Q, R=R, q=q)
def state2outputs(self, x):
return self.P @ x
class DiscreteBoeingModel(DiscreteControlModel):
def __init__(self, output=None):
model = BoeingModel(output=output)
dt = 0.1
super().__init__(model=model, dt=dt)
class BoeingEnvironment(ControlEnvironment):
@property
def observation_labels(self):
return self.discrete_model.continuous_model.observation_labels
def __init__(self, Tmax=10):
model = DiscreteBoeingModel()
super().__init__(discrete_model=model, Tmax=Tmax)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil
from irlc.ex04.discrete_control_model import DiscreteControlModel
from irlc.ex04.control_environment import ControlEnvironment
from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
from gymnasium.spaces import Box
"""
SEE: https://github.com/anassinator/ilqr/blob/master/examples/rendezvous.ipynb
"""
class ContiniousRendevouzModel(LinearQuadraticModel):
state_labels= ["x0", "y0", "x1", "y1", 'Vx0', "Vy0", "Vx1", "Vy1"]
action_labels = ['Fx0', 'Fy0', "Fx1", "Fy1"]
x0 = np.array([0, 0, 10, 10, 0, -5, 5, 0]) # Initial state.
def __init__(self, m=10.0, alpha=0.1, simple_bounds=None, cost=None):
m00 = np.zeros((4,4))
mI = np.eye(4)
A = np.block( [ [m00, mI], [m00, -alpha/m*mI] ] )
B = np.block( [ [m00], [mI/m]] )
state_size = len(self.x0)
action_size = 4
self.m = m
self.alpha = alpha
Q = np.eye(state_size)
Q[0, 2] = Q[2, 0] = -1
Q[1, 3] = Q[3, 1] = -1
R = 0.1 * np.eye(action_size)
self.viewer = None
super().__init__(A=A, B=B, Q=Q*20, R=R*20)
def x0_bound(self) -> Box:
return Box(self.x0, self.x0) # self.bounds['x0_low'] = self.bounds['x0_high'] = list(self.x0)
def render(self, x, render_mode="human"):
""" Render the environment. You don't have to understand this code. """
if self.viewer is None:
self.viewer = HarmonicViewer(xstar=0, x0=self.x0) # target: x=0.
self.viewer.update(x)
import time
time.sleep(0.05)
return self.viewer.blit(render_mode=render_mode)
def close(self):
pass
class DiscreteRendevouzModel(DiscreteControlModel):
def __init__(self, dt=0.1, cost=None, transform_actions=True, **kwargs):
model = ContiniousRendevouzModel(**kwargs)
super().__init__(model=model, dt=dt, cost=cost)
class RendevouzEnvironment(ControlEnvironment):
def __init__(self, Tmax=20, render_mode=None, **kwargs):
discrete_model = DiscreteRendevouzModel(**kwargs)
super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode)
class HarmonicViewer(UpgradedGraphicsUtil):
def __init__(self, xstar = 0, x0=None):
self.xstar = xstar
width = 800
self.x0 = x0
sz = 20
self.scale = width/(2*sz)
self.p1h = []
self.p2h = []
super().__init__(screen_width=width, xmin=-sz, xmax=sz, ymin=-sz, ymax=sz, title='Rendevouz environment')
def render(self):
self.draw_background(background_color=(255, 255, 255))
# dw = self.dw
p1 = self.x[:2]
p2 = self.x[2:4]
self.p1h.append(p1)
self.p2h.append(p2)
self.circle("asdf", pos=p1, r=.5 * self.scale, fillColor=(200, 0, 0))
self.circle("asdf", pos=p2, r=.5 * self.scale, fillColor=(0, 0, 200) )
if len(self.p1h) > 2:
self.polyline('...', np.stack(self.p1h)[:,0], np.stack(self.p1h)[:,1], width=1, color=(200, 0, 0))
self.polyline('...', np.stack(self.p2h)[:,0], np.stack(self.p2h)[:,1], width=1, color=(0, 0, 200))
if tuple(self.x) == tuple(self.x0):
self.p1h = []
self.p2h = []
def update(self, x):
self.x = x
if __name__ == "__main__":
from irlc import Agent, train
env = RendevouzEnvironment(render_mode='human')
from irlc.ex06.lqr_agent import LQRAgent
a2 = LQRAgent(env=env)
stats, traj = train(env, Agent(env), num_episodes=1)
pass
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""This directory contains the exercises for week 7."""
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""
References:
[Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
[TET12] Yuval Tassa, Tom Erez, and Emanuel Todorov. Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 4906–4913. IEEE, 2012. (See tassa2012.pdf).
[Har20] James Harrison. Optimal and learning-based control combined course notes. (See AA203combined.pdf), 2020.
"""
r"""
This implements two methods: The basic ILQR method, described in (Her25, Algorithm 24), and the linesearch-based method
described in (Her25, Algorithm 25).
If you are interested, you can consult (TET12) (which contains generalization to DDP) and (Har20, Alg 1).
"""
import warnings
import numpy as np
from irlc.ex06.dlqr import LQR
from irlc.ex04.discrete_control_model import DiscreteControlModel
def ilqr_basic(model : DiscreteControlModel, N, x0, us_init : list = None, n_iterations=500, verbose=True):
r"""
Implements the basic ilqr algorithm, i.e. (Her25, Algorithm 24). Our notation (x_bar, etc.) will be consistent with the lecture slides
"""
mu, alpha = 1, 1 # Hyperparameters. For now, just let them have defaults and don't change them
# Create a random initial state-sequence
n, m = model.state_size, model.action_size
u_bar = [np.random.uniform(-1, 1,(model.action_size,)) for _ in range(N)] if us_init is None else us_init
x_bar = [x0] + [np.zeros(n, )] * N
"""
Initialize nominal trajectory xs, us using us and x0 (i.e. simulate system from x0 using action sequence us).
The simplest way to do this is to call forward_pass with all-zero sequence of control vector/matrix l, L.
"""
# TODO: 2 lines missing.
raise NotImplementedError("Initialize x_bar, u_bar here")
J_hist = []
for i in range(n_iterations):
"""
Compute derivatives around trajectory and cost estimate J of trajectory. To do so, use the get_derivatives
function. Remember the functions will return lists of derivatives.
"""
# TODO: 2 lines missing.
raise NotImplementedError("Compute J and derivatives A_k = f_x, B_k = f_u, ....")
""" Backward pass: Obtain feedback law matrices l, L using the backward_pass function.
"""
# TODO: 1 lines missing.
raise NotImplementedError("Compute L, l = .... here")
""" Forward pass: Given L, l matrices computed above, simulate new (optimal) action sequence.
In the lecture slides, this is similar to how we compute u^*_k and x_k
Once they are computed, iterate the iLQR algorithm by setting x_bar, u_bar equal to these values
"""
# TODO: 1 lines missing.
raise NotImplementedError("Compute x_bar, u_bar = ...")
if verbose:
print(f"{i}> J={J:4g}, change in cost since last iteration {0 if i == 0 else J-J_hist[-1]:4g}")
J_hist.append(J)
return x_bar, u_bar, J_hist, L, l
def ilqr_linesearch(model : DiscreteControlModel, N, x0, n_iterations, us_init=None, tol=1e-6, verbose=True):
r"""
For linesearch implement method described in (Her25, Algorithm 25) (we will use regular iLQR, not DDP!)
"""
# The range of alpha-values to try out in the linesearch
# plus parameters relevant for regularization scheduling.
alphas = 1.1 ** (-np.arange(10) ** 2) # alphas = [1, 1.1^{-2}, ...]
mu_min = 1e-6
mu_max = 1e10
Delta_0 = 2
mu = 1.0
Delta = Delta_0
n, m = model.state_size, model.action_size
u_bar = [np.random.uniform(-1, 1, (model.action_size,)) for _ in range(N)] if us_init is None else us_init
x_bar = [x0] + [np.zeros(n, )] * (N)
# Initialize nominal trajectory xs, us (same as in basic linesearch)
# TODO: 2 lines missing.
raise NotImplementedError("Copy-paste code from previous solution")
J_hist = []
converged = False
for i in range(n_iterations):
alpha_was_accepted = False
""" Step 1: Compute derivatives around trajectory and cost estimate of trajectory.
(copy-paste from basic implementation). In our implementation, J_bar = J_{u^star}(x_0) """
# TODO: 2 lines missing.
raise NotImplementedError("Obtain derivatives f_x, f_u, ... as well as cost of trajectory J_bar = ...")
try:
"""
Step 2: Backward pass to obtain control law (l, L). Same as before so more copy-paste
"""
# TODO: 1 lines missing.
raise NotImplementedError("Obtain l, L = ... in backward pass")
"""
Step 3: Forward pass and alpha scheduling.
Decrease alpha and check condition |J^new < J'|. Apply the regularization scheduling as needed. """
for alpha in alphas:
x_hat, u_hat = forward_pass(model, x_bar, u_bar, L=L, l=l, alpha=alpha) # Simulate trajectory using this alpha
# TODO: 1 lines missing.
raise NotImplementedError("Compute J_new = ... as the cost of trajectory x_hat, u_hat")
if J_new < J_prime:
""" Linesearch proposed trajectory accepted! Set current trajectory equal to x_hat, u_hat. """
if np.abs((J_prime - J_new) / J_prime) < tol:
converged = True # Method does not seem to decrease J; converged. Break and return.
J_prime = J_new
x_bar, u_bar = x_hat, u_hat
'''
The update was accepted and you should change the regularization term mu,
and the related scheduling term Delta.
'''
# TODO: 1 lines missing.
raise NotImplementedError("Delta, mu = ...")
alpha_was_accepted = True # accept this alpha
break
except np.linalg.LinAlgError as e:
# Matrix in dlqr was not positive-definite and this diverged
warnings.warn(str(e))
if not alpha_was_accepted:
''' No alphas were accepted, which is not too hot. Regularization should change
'''
# TODO: 1 lines missing.
raise NotImplementedError("Delta, mu = ...")
if mu_max and mu >= mu_max:
raise Exception("Exceeded max regularization term; we are stuffed.")
dJ = 0 if i == 0 else J_prime-J_hist[-1]
info = "converged" if converged else ("accepted" if alpha_was_accepted else "failed")
if verbose:
print(f"{i}> J={J_prime:4g}, decrease in cost {dJ:4g} ({info}).\nx[N]={x_bar[-1].round(2)}")
J_hist.append(J_prime)
if converged:
break
return x_bar, u_bar, J_hist, L, l
def backward_pass(A : list, B : list, c_x : list, c_u : list, c_xx : list, c_ux : list, c_uu : list, mu=1):
r"""Given all derivatives, apply the LQR algorithm to get the control law.
The input arguments are described in the online documentation and the lecture notes. You should use them to call your
implementation of the :func:`~irlc.ex06.dlqr.LQR` method. Note that you should give a value of all inputs except for the ``d``-term.
:param A: linearization of the dynamics matrices :math:`A_k`.
:param B: linearization of the dynamics matrices :math:`B_k`.
:param c_x: Cost terms corresponding to :math:`\mathbf{q}_k`
:param c_u: Cost terms corresponding to :math:`\mathbf{r}_k`
:param c_xx: Cost terms corresponding to :math:`Q_k`
:param c_ux: Cost terms corresponding to :math:`H_k`
:param c_uu: Cost terms corresponding to :math:`R_k`
:param mu: Regularization parameter for the LQR method
:return: The control law :math:`L_k, \mathbf{l}_k` as two lists.
"""
Q, QN = c_xx[:-1], c_xx[-1] # An example.
# TODO: 4 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
# Define the inputs using the linearization inputs.
(L, l), (V, v, vc) = LQR(A=A, B=B, R=R, Q=Q, QN=QN, H=H, q=q, qN=qN, r=r, mu=mu)
return L, l
def cost_of_trajectory(model : DiscreteControlModel, xs : list, us : list) -> float:
r"""Helper function which computes the cost of the trajectory.
The cost is defined as:
.. math::
c_N( \bar {\mathbf x}_N, \bar {\mathbf u}_) + \sum_{k=0}^{N-1} c_k(\bar {\mathbf x}_k, \bar {\mathbf u}_k)
and to compute it, you should use the two helper methods ``model.cost.c`` and ``model.cost.cN``
(see :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.c` and :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.cN`).
:param model: The control model used to compute the cost.
:param xs: A list of length :math:`N+1` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_N \end{bmatrix}`
:param us: A list of length :math:`N` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_{N-1} \end{bmatrix}`
:return: The cost as a number.
"""
N = len(us)
JN = model.cost.cN(xs[-1])
return sum(map(lambda args: model.cost.c(*args), zip(xs[:-1], us, range(N)))) + JN
def get_derivatives(model : DiscreteControlModel, x_bar : list, u_bar : list):
"""Compute all the derivatives used in the model.
The return type should match the meaning in (Her25, Subequation 17.8) and in the online documentation.
- ``c`` should be a list of length :math:`N+1`
- ``c_x`` should be a list of length :math:`N+1`
- ``c_xx`` should be a list of length :math:`N+1`
- ``c_u`` should be a list of length :math:`N`
- ``c_uu`` should be a list of length :math:`N`
- ``c_ux`` should be a list of length :math:`N`
- ``A`` should be a list of length :math:`N`
- ``B`` should be a list of length :math:`N`
Use the model to compute these terms. For instance, this will compute the first terms ``A[0]`` and ``B[0]``::
A0, B0 = model.f_jacobian(x_bar[0], u_bar[0], 0)
Meanwhile, to compute the first terms of the cost-functions you should use::
c[0], c_x[0], c_u[0], c_xx[0], c_ux[0], c_uu[0] = model.cost.c(x_bar[0], u_bar[0], k=0, compute_gradients=True)
:param model: The model to use when computing the derivatives of the cost
:param x_bar: The nominal state-trajectory
:param u_bar: The nominal action-trajectory
:return: Lists of all derivatives computed around the nominal trajectory (see the lecture notes).
"""
N = len(u_bar)
""" Compute A_k, B_k (lists of matrices of length N) as the jacobians of the dynamics. To do so,
recall from the online documentation that:
x, f_x, f_u = model.f(x, u, k, compute_jacobian=True)
"""
A = [None]*N
B = [None]*N
c = [None] * (N+1)
c_x = [None] * (N + 1)
c_xx = [None] * (N + 1)
c_u = [None] * (N+1)
c_ux = [None] * (N + 1)
c_uu = [None] * (N + 1)
# Now update each entry correctly (i.e., ensure there are no None elements left).
# TODO: 4 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
""" Compute derivatives of the cost function. For terms not including u these should be of length N+1
(because of gN!), for the other lists of length N
recall model.cost.c has output:
c[i], c_x[i], c_u[i], c_xx[i], c_ux[i], c_uu[i] = model.cost.c(x, u, i, compute_gradients=True)
"""
# TODO: 2 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
# Concatenate the derivatives associated with the last time point N.
cN, c_xN, c_xxN = model.cost.cN(x_bar[N], compute_gradients=True)
# TODO: 3 lines missing.
raise NotImplementedError("Update c, c_x and c_xx with the terminal terms.")
return A, B, c, c_x, c_u, c_xx, c_ux, c_uu
def forward_pass(model : DiscreteControlModel, x_bar : list, u_bar : list, L : list, l : list, alpha=1.0):
r"""Simulates the effect of the controller on the model
We assume the system starts in :math:`\mathbf{x}_0 = \bar {\mathbf x}_0`, and then simulate the effect of
generating actions using the closed-loop policy
.. math::
\mathbf{u}_k = \bar {\mathbf u}_k + \alpha \mathbf{l}_k + L_k (\mathbf{x}_k - \bar { \mathbf x}_k)
(see (Her25, eq. (17.16))).
:param model: The model used to compute the dynamics.
:param x_bar: A nominal list of states
:param u_bar: A nominal list of actions (not used by the method)
:param L: A list of control matrices :math:`L_k`
:param l: A list of control vectors :math:`\mathbf{l}_k`
:param alpha: The linesearch parameter.
:return: A list of length :math:`N+1` of simulated states and a list of length :math:`N` of simulated actions.
"""
N = len(u_bar)
x = [None] * (N+1)
u_star = [None] * N
x[0] = x_bar[0].copy()
for i in range(N):
r""" Compute using (Her25, eq. (17.16))
u_{i} = ...
"""
# TODO: 1 lines missing.
raise NotImplementedError("u_star[i] = ....")
""" Remember to compute
x_{i+1} = f_k(x_i, u_i^*)
here:
"""
# TODO: 1 lines missing.
raise NotImplementedError("x[i+1] = ...")
return x, u_star
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""
References:
[Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
"""
from irlc.ex06.model_rendevouz import RendevouzEnvironment
from irlc.ex07.ilqr_rendovouz_basic import ilqr
from irlc import train
from irlc import Agent
import numpy as np
class ILQRAgent(Agent):
def __init__(self, env, discrete_model, N=250, ilqr_iterations=10, use_ubar=False, use_linesearch=True):
super().__init__(env)
self.dt = discrete_model.dt
# x0 = discrete_model.reset()
x0,_ = env.reset()
x0 = np.asarray(x0) # Get the initial state. We will take this from the environment.
xs, us, self.J_hist, L, l = ilqr(discrete_model, N, x0, n_iter=ilqr_iterations, use_linesearch=use_linesearch)
self.ubar = us
self.xbar = xs
self.L = L
self.l = l
self.use_ubar = use_ubar # Should policy use open-loop u-bar (suboptimal) or closed-loop L_k, l_k?
def pi(self, x, k, info=None):
if self.use_ubar:
u = self.ubar[k]
else:
if k >= len(self.ubar):
print(k, len(self.ubar))
k = len(self.ubar)-1
# See (Her25, eq. (17.16))
# TODO: 1 lines missing.
raise NotImplementedError("Generate action using the control matrices.")
return u
def solve_rendevouz():
env = RendevouzEnvironment()
N = int(env.Tmax / env.dt)
agent = ILQRAgent(env, env.discrete_model, N=N)
stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
env.close()
return stats, trajectories, agent
if __name__ == "__main__":
from irlc.ex07.ilqr_rendovouz_basic import plot_vehicles
import matplotlib.pyplot as plt
stats, trajectories, agent = solve_rendevouz()
t =trajectories[0].state
xb = agent.xbar
plot_vehicles(t[:,0], t[:,1], t[:,2], t[:,3], linespec=':', legend=("RK4 policy simulation", "RK4 policy simulation"))
plot_vehicles(xb[:,0], xb[:,1], xb[:,2], xb[:,3], linespec='-')
plt.legend()
plt.show()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import matplotlib.pyplot as plt
import numpy as np
from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
import time
from irlc.ex07.ilqr_rendovouz_basic import ilqr
from irlc import savepdf
# Number of steps.
N = 100
def cartpole(use_linesearch):
env = GymSinCosCartpoleEnvironment(render_mode='human')
x0, info = env.reset()
xs, us, J_hist, L, l = ilqr(env.discrete_model, N, x0, n_iter=300, use_linesearch=use_linesearch)
plot_cartpole(env, xs, us, use_linesearch=use_linesearch)
def plot_cartpole(env, xs, us, J_hist=None, use_linesearch=True):
animate(xs, env)
env.close()
# Transform actions/states using build-in functions.
def gapply(f, xm):
usplit = np.split(xm, len(xm))
u2 = [f(u.flat) for u in usplit]
us = np.stack(u2)
return us
us = gapply(env.discrete_model.phi_u_inv, us)
xs = gapply(env.discrete_model.phi_x_inv, xs)
t = np.arange(N + 1) * env.dt
x = xs[:, 0]
theta = np.unwrap(xs[:, 2]) # Makes for smoother plots.
theta_dot = xs[:, 3]
pdf_ex = '_linesearch' if use_linesearch else ''
ev = 'cartpole_'
plt.plot(theta, theta_dot)
plt.xlabel("theta (rad)")
plt.ylabel("theta_dot (rad/s)")
plt.title("Orientation Phase Plot")
plt.grid()
savepdf(f"{ev}theta{pdf_ex}")
plt.show()
_ = plt.plot(t[:-1], us)
_ = plt.xlabel("time (s)")
_ = plt.ylabel("Force (N)")
_ = plt.title("Action path")
plt.grid()
savepdf(f"{ev}action{pdf_ex}")
plt.show()
_ = plt.plot(t, x)
_ = plt.xlabel("time (s)")
_ = plt.ylabel("Position (m)")
_ = plt.title("Cart position")
plt.grid()
savepdf(f"{ev}position{pdf_ex}")
plt.show()
if J_hist is not None:
_ = plt.plot(J_hist)
_ = plt.xlabel("Iteration")
_ = plt.ylabel("Total cost")
_ = plt.title("Total cost-to-go")
plt.grid()
savepdf(f"{ev}J{pdf_ex}")
plt.show()
def animate(xs0, env):
render = True
if render:
for i in range(2):
render_(xs0, env.discrete_model)
time.sleep(1)
# env.viewer.close()
def render_(xs, env):
for i in range(xs.shape[0]):
x = xs[i]
env.render(x=x)
if __name__ == "__main__":
cartpole(use_linesearch=True)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.ex07.ilqr_agent import ILQRAgent
from irlc import train
from irlc import savepdf
import matplotlib.pyplot as plt
from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True):
np.random.seed(2)
Tmax = .9
dt = Tmax/N
env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None)
agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
agent.use_ubar = True
stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
env.close()
xb = agent.xbar
tb = np.arange(N+1)*dt
plt.figure(figsize=(8,6))
F = 3
plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$')
plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$')
plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$")
plt.xlabel("Time/seconds")
plt.ylabel("$\cos(\\theta)$")
plt.title(f"Cartpole environment $T={N}$")
plt.grid()
plt.legend()
ev = "pendulum"
savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}")
plt.show()
def plt_cartpole():
cartpole_experiment(N=50, use_linesearch=True, animate=True)
if __name__ == '__main__':
plt_cartpole()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
import matplotlib.pyplot as plt
import time
from irlc.ex07.ilqr_rendovouz_basic import ilqr
from irlc import savepdf
def pendulum(use_linesearch):
print("> Using iLQR to solve Pendulum swingup task. Using linesearch?", use_linesearch)
dt = 0.02
model = DiscreteSinCosPendulumModel(dt, cost=None)
N = 250
# This rather clunky line gets us the initial state; we transform the bound by the variable transformation.
x0 = np.asarray(model.phi_x(model.continuous_model.x0_bound().low))
n_iter = 200 # Use 200 iLQR iterations.
# xs, us, J_hist, L, l = ilqr(model, ...) Write a function call like this, but with the correct parametesr
# TODO: 1 lines missing.
raise NotImplementedError("Call iLQR here (see hint above).")
render = True
if render:
for i in range(2):
render_(xs, model)
time.sleep(2) # Sleep for two seconds between simulations.
model.close()
xs = np.asarray([model.phi_x_inv(x) for x, u in zip(xs, us)]) # Convert to Radians. We use the build-in functions to change coordinates.
xs, us = np.asarray(xs), np.asarray(us)
t = np.arange(N) * dt
theta = np.unwrap(xs[:, 0]) # Makes for smoother plots.
theta_dot = xs[:, 1]
pdf_ex = '_linesearch' if use_linesearch else ''
stitle = "(using linesearch)" if use_linesearch else "(not using linesearch) "
ev = 'pendulum_'
_ = plt.plot(theta, theta_dot)
_ = plt.xlabel("$\\theta$ (rad)")
_ = plt.ylabel("$d\\theta/dt$ (rad/s)")
_ = plt.title(f"Phase Plot {stitle}")
plt.grid()
savepdf(f"{ev}theta{pdf_ex}")
plt.show()
_ = plt.plot(t, us)
_ = plt.xlabel("time (s)")
_ = plt.ylabel("Force (N)")
_ = plt.title(f"Action path {stitle}")
plt.grid()
savepdf(f"{ev}action{pdf_ex}")
plt.show()
_ = plt.plot(J_hist)
_ = plt.xlabel("Iteration")
_ = plt.ylabel("Total cost")
_ = plt.title(f"Total cost-to-go {stitle}")
plt.grid()
savepdf(f"{ev}J{pdf_ex}")
plt.show()
def render_(xs, env):
for i in range(xs.shape[0]):
env.render(xs[i])
if __name__ == "__main__":
pendulum(use_linesearch=False)
pendulum(use_linesearch=True)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
from irlc.ex07.ilqr_agent import ILQRAgent
from irlc import train
from irlc import savepdf
import matplotlib.pyplot as plt
Tmax = 3
def pen_experiment(N=12, use_linesearch=True,figex="", animate=True):
dt = Tmax / N
env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode="human" if animate else None)
agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
agent.use_ubar = True
stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
env.close()
plot_pendulum_trajectory(env, trajectories[0], label='Closed-loop $\\pi$')
xb = agent.xbar
tb = np.arange(N+1)*dt
plt.figure(figsize=(12, 6))
plt.plot(trajectories[0].time, trajectories[0].state[:,1], '-', label='Closed-loop $\\pi(x_k)$')
plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$')
plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$")
plt.grid()
plt.legend()
ev = "pendulum"
savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}")
plt.show()
## Plot J
plt.figure(figsize=(6, 6))
plt.semilogy(agent.J_hist, 'k.-')
plt.xlabel("iLQR Iterations")
plt.ylabel("Cost function estimate $J$")
# plt.title("Last value: {")
plt.grid()
savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}")
plt.show()
def plot_pendulum_trajectory(env, traj, style='k.-', label=None, action=False, **kwargs):
if action:
y = traj.action[:, 0]
y = np.clip(y, env.action_space.low[0], env.action_space.high[0])
else:
y = traj.state[:, 1]
plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs)
plt.xlabel("Time/seconds")
if action:
plt.ylabel("Torque $u$")
else:
plt.ylabel(r"$\cos(\theta)$")
plt.grid()
pass
N = 50
if __name__ == "__main__":
pen_experiment(N=N, use_linesearch=True)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc.ex07.ilqr_rendovouz_basic import plot_rendevouz
if __name__ == "__main__":
plot_rendevouz(use_linesearch=True)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
import numpy as np
import matplotlib.pyplot as plt
from irlc import savepdf
from irlc.ex07.ilqr import ilqr_basic, ilqr_linesearch
from irlc.ex06.model_rendevouz import DiscreteRendevouzModel
from irlc.ex04.control_environment import ControlEnvironment
from irlc.ex04.discrete_control_model import DiscreteControlModel
# plt.ion()
def ilqr(model : DiscreteControlModel, N, x0, n_iter, use_linesearch, verbose=True):
if not use_linesearch:
xs, us, J_hist, L, l = ilqr_basic(model, N, x0, n_iterations=n_iter,verbose=verbose)
else:
xs, us, J_hist, L, l = ilqr_linesearch(model, N, x0, n_iterations=n_iter, tol=1e-6,verbose=verbose)
xs, us = np.stack(xs), np.stack(us)
return xs, us, J_hist, L, l
def plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2")):
_ = plt.title("Trajectory of the two omnidirectional vehicles")
_ = plt.plot(x_0, y_0, "r"+linespec, label=legend[0])
_ = plt.plot(x_1, y_1, "b"+linespec, label=legend[1])
Tmax = 20
def solve_rendovouz(use_linesearch=False):
model = DiscreteRendevouzModel()
x0 = np.asarray(model.continuous_model.x0_bound().low) # Starting position
N = int(Tmax/model.dt)
return ilqr(model, N, x0, n_iter=10, use_linesearch=use_linesearch), model
def plot_rendevouz(use_linesearch=False):
(xs, us, J_hist, _, _), env = solve_rendovouz(use_linesearch=use_linesearch)
N = int(Tmax / env.dt)
dt = env.dt
x_0 = xs[:, 0]
y_0 = xs[:, 1]
x_1 = xs[:, 2]
y_1 = xs[:, 3]
x_0_dot = xs[:, 4]
y_0_dot = xs[:, 5]
x_1_dot = xs[:, 6]
y_1_dot = xs[:, 7]
pdf_ex = '_linesearch' if use_linesearch else ''
ev = 'rendevouz_'
plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2"))
plt.legend()
savepdf(f'{ev}trajectory{pdf_ex}')
plt.show()
t = np.arange(N + 1) * dt
_ = plt.plot(t, x_0, "r")
_ = plt.plot(t, x_1, "b")
_ = plt.xlabel("Time (s)")
_ = plt.ylabel("x (m)")
_ = plt.title("X positional paths")
_ = plt.legend(["Vehicle 1", "Vehicle 2"])
savepdf(f'{ev}vehicles_x_pos{pdf_ex}')
plt.show()
_ = plt.plot(t, y_0, "r")
_ = plt.plot(t, y_1, "b")
_ = plt.xlabel("Time (s)")
_ = plt.ylabel("y (m)")
_ = plt.title("Y positional paths")
_ = plt.legend(["Vehicle 1", "Vehicle 2"])
savepdf(f'{ev}vehicles_y_pos{pdf_ex}')
plt.show()
_ = plt.plot(t, x_0_dot, "r")
_ = plt.plot(t, x_1_dot, "b")
_ = plt.xlabel("Time (s)")
_ = plt.ylabel("x_dot (m)")
_ = plt.title("X velocity paths")
_ = plt.legend(["Vehicle 1", "Vehicle 2"])
savepdf(f'{ev}vehicles_vx{pdf_ex}')
plt.show()
_ = plt.plot(t, y_0_dot, "r")
_ = plt.plot(t, y_1_dot, "b")
_ = plt.xlabel("Time (s)")
_ = plt.ylabel("y_dot (m)")
_ = plt.title("Y velocity paths")
_ = plt.legend(["Vehicle 1", "Vehicle 2"])
savepdf(f'{ev}vehicles_vy{pdf_ex}')
plt.show()
_ = plt.plot(J_hist)
_ = plt.xlabel("Iteration")
_ = plt.ylabel("Total cost")
_ = plt.title("Total cost-to-go")
savepdf(f'{ev}cost_to_go{pdf_ex}')
plt.show()
pass
if __name__ == "__main__":
plot_rendevouz(use_linesearch=False)
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
"""
References:
[Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
"""
from irlc.ex06.dlqr import LQR
from irlc import Agent
from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
from irlc import train, savepdf
import matplotlib.pyplot as plt
import numpy as np
from irlc.ex04.control_environment import ControlEnvironment
from irlc.ex04.discrete_control_model import DiscreteControlModel
class LinearizationAgent(Agent):
""" Implement the simple linearization procedure described in (Her25, Algorithm 23) which expands around a single fixed point. """
def __init__(self, env: ControlEnvironment, model : DiscreteControlModel, xbar=None, ubar=None):
self.model = model
N = 50 # Plan on this horizon. The control matrices will converge fairly quickly.
""" Define A, B, d as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B u[t] + d.
You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B.
It is important that you linearize around xbar, ubar. See (Her25, Section 17.1) for further details. """
# TODO: 4 lines missing.
raise NotImplementedError("Insert your solution and remove this error.")
Q, q, R = self.model.cost.Q, self.model.cost.q, self.model.cost.R
""" Define self.L, self.l here as the (lists of) control matrices. """
# TODO: 1 lines missing.
raise NotImplementedError("Compute control matrices L, l here using LQR(...)")
super().__init__(env)
def pi(self, x, k, info=None):
"""
Compute the action here using u_k = L_0 x_k + l_0. The control matrix/vector L_0 can be found as the output from LQR, i.e.
L_0 = L[0] and l_0 = l[0].
The reason we use L_0, l_0 (and not L_k, l_k) is because the LQR problem itself is an approximation of the true dynamics
and this controller will be able to balance the pendulum for an infinite amount of time.
"""
# TODO: 1 lines missing.
raise NotImplementedError("Compute current action here")
return u
def get_offbalance_cart(waiting_steps=30, sleep_time=0.1):
env = GymSinCosCartpoleEnvironment(Tmax=3, render_mode='human')
env.reset()
import time
time.sleep(sleep_time)
env.state = env.discrete_model.x_upright
env.state[-1] = 0.01 # a bit of angular speed.
for _ in range(waiting_steps): # Simulate the environment for 30 steps to get things out of balance.
env.step(1)
time.sleep(sleep_time)
return env
if __name__ == "__main__":
np.random.seed(42) # I don't think these results are seed-dependent but let's make sure.
from irlc import plot_trajectory
env = get_offbalance_cart(4) # Simulate for 4 seconds to get the cart off-balance. Same idea as PID control.
agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0)
_, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False) # Note reset=False to maintain initial conditions.
plot_trajectory(trajectories[0], env, xkeys=[0,2, 3], ukeys=[0])
env.close()
savepdf("linearization_cartpole")
plt.show()
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
from irlc import Agent, train
from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
if __name__ == "__main__":
env = GymSinCosCartpoleEnvironment(Tmax=20, render_mode='human')
train(env, Agent(env), num_episodes=1)
env.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment