Exercises

c13c7d04 · tuhe · 3344e577 · c13c7d04 · c13c7d04 · c13c7d04
Commit c13c7d04 authored 4 months ago by tuhe
--- a/irlc/ex06/__init__.py
+++ b/irlc/ex06/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 6."""
--- a/irlc/ex06/boeing_lqr.py
+++ b/irlc/ex06/boeing_lqr.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import savepdf
+from irlc import train
+from irlc.ex06.model_boeing import BoeingEnvironment
+from irlc.ex06.lqr_agent import LQRAgent
+from irlc.ex03.control_model import ControlModel
+import scipy
+
+
+def boeing_simulation():
+    env = BoeingEnvironment(Tmax=10)
+    model = env.discrete_model.continuous_model # get the model from the Boeing environment
+    dt = env.dt # Get the discretization time.
+    A, B, d = compute_A_B_d(model, dt)
+    # Use compute_Q_R_q to get the Q, R, and q matrices in the discretized system
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Compute Q, R and q here")
+    ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+    #----------------------------------------------------------------------------------------------------------------------------
+    # agent = LQRAgent(env, A=A??????????????????????????
+    raise NotImplementedError("Use your LQRAgent to plan using the system matrices.")
+    stats, trajectories = train(env, agent, return_trajectory=True)
+    return stats, trajectories, env
+
+def compute_Q_R_q(model : ControlModel, dt : float):
+    cost = model.get_cost() # Get the continuous-time cost-function
+    # use print(cost) to see what it contains.
+    # Then get the discretized matrices using the techniques described in (Her25, Subsection 13.1.6).
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    return Q, R, q
+
+def compute_A_B_d(model : ControlModel, dt : float):
+    if model.d is None:
+        d = np.zeros((model.state_size,))  # Ensure d is set to a zero vector if it is not defined.
+    else:
+        d = model.d
+
+    A_discrete = scipy.linalg.expm(model.A * dt)  # This is the discrete A-matrix computed using the matrix exponential
+    # Now it is your job to define B_discrete and d_discrete.
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    return A_discrete, B_discrete, d_discrete.flatten()
+
+def boeing_experiment():
+    _, trajectories, env = boeing_simulation()
+    model = env.discrete_model.continuous_model
+
+    dt = env.dt 
+    Q, R, q = compute_Q_R_q(model, dt)
+    print("Discretization time is", dt)
+    print("Original q-vector was:", model.get_cost().q)
+    print("Discretized version is:", q) 
+
+    t = trajectories[-1]
+    out = t.state @ model.P.T
+
+    plt.plot(t.time, out[:, 0], '-', label=env.observation_labels[0])
+    plt.plot(t.time, out[:, 1], '-', label=env.observation_labels[1])
+    plt.grid()
+    plt.legend()
+    plt.xlabel("Time/seconds")
+    plt.ylabel("Output")
+    savepdf("boing_lqr_output")
+    plt.show(block=False)
+    plt.close()
+
+    plt.plot(t.time[:-1], t.action[:, 0], '-', label=env.action_labels[0])
+    plt.plot(t.time[:-1], t.action[:, 1], '-', label=env.action_labels[1])
+    plt.xlabel("Time/seconds")
+    plt.ylabel("Control action")
+    plt.grid()
+    plt.legend()
+    savepdf("boing_lqr_action")
+    plt.show()
+
+if __name__ == "__main__":
+    boeing_experiment()
--- a/irlc/ex06/dlqr.py
+++ b/irlc/ex06/dlqr.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import bmatrix
+from irlc import savepdf
+
+
+
+def LQR(A : list,  # Dynamic
+        B : list,  # Dynamics
+        d : list =None,  # Dynamics (optional)
+        Q : list=None,
+        R: list=None,
+        H : list=None,
+        q : list=None,
+        r : list=None,
+        qc : list=None,
+        QN : np.ndarray =None, # Terminal cost term
+        qN : np.ndarray=None,  # Terminal cost term
+        qcN : np.ndarray =None,  # Terminal cost term.
+        mu : float =0 # regularization parameter which will only be relevant next week.
+        ):
+    r"""
+    Implement the LQR as defined in (Her25, Algorithm 22). I recommend viewing this documentation online (documentation for week 6).
+
+    When you solve this exercise, look at the algorithm in the book. Since the LQR problem is on the form:
+
+    .. math::
+
+        x_{k+1} = A_k x_k + B_k u_k + d_k
+
+    For :math:`k=0,\dots,N-1` this means there are :math:`N` matrices :math:`A_k`. This is implemented by assuming that
+    :python:`A` (i.e., the input argument) is a :python:`list` of length :math:`N` so that :python:`A[k]` corresponds
+    to :math:`A_k`.
+
+    Similar conventions are used for the cost term (please see the lecture notes or the online documentation for their meaning). Recall it has the form:
+
+    .. math::
+
+        c(x_k, u_k) = \frac{1}{2} \mathbf x_k^\top Q_k \mathbf x_k + \frac{1}{2} \mathbf q_k^\top \mathbf x_k + q_k + \cdots
+
+    When the function is called, the vector :math:`\textbf{q}_k` corresponds to :python:`q` and the constant :math:`q_k` correspond to :python:`qc` (q-constant).
+
+    .. note::
+
+        Only the terms :python:`A` and :python:`B` are required. The rest of the terms will default to 0-matrices.
+
+    The LQR algorithm will ultimately compute a control law of the form:
+
+    .. math::
+
+        \mathbf u_k = L_k \mathbf x_k + \mathbf l_k
+
+    And a cost-to-go function as:
+
+    .. math::
+
+        J_k(x_k) = \frac{1}{2} \mathbf x_k^\top V_k \mathbf x_k + v_k^\top \mathbf x_k + v_k
+
+    Again there are :math:`N-1` terms. The function then return :python:`return (L, l), (V, v, vc)` so that :python:`L[k]` corresponds to :math:`L_k`.
+
+    :param A: A list of :python:`np.ndarray` containing all terms :math:`A_k`
+    :param B: A list of :python:`np.ndarray` containing all terms :math:`B_k`
+    :param d: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf d_k` (optional)
+    :param Q: A list of :python:`np.ndarray` containing all terms  :math:`Q_k` (optional)
+    :param R: A list of :python:`np.ndarray` containing all terms  :math:`R_k` (optional)
+    :param H: A list of :python:`np.ndarray` containing all terms  :math:`H_k` (optional)
+    :param q: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf q_k` (optional)
+    :param r: A list of :python:`np.ndarray` containing all terms  :math:`\mathbf r_k` (optional)
+    :param qc: A list of :python:`float` containing all terms  :math:`q_k` (i.e., constant terms) (optional)
+    :param QN: A :python:`np.ndarray` containing the terminal cost term :math:`Q_N` (optional)
+    :param qN: A :python:`np.ndarray` containing the terminal cost term :math:`\mathbf q_N` (optional)
+    :param qcN: A :python:`np.ndarray` containing the terminal cost term :math:`q_N`
+    :param mu: A regularization term which is useful for iterative-LQR (next week). Default to 0.
+    :return: A tuple of the form :python:`(L, l), (V, v, vc)` corresponding to the control and cost-matrices.
+    """
+    N = len(A)
+    n,m = B[0].shape
+    # Initialize empty lists for control matrices and cost terms
+    L, l = [None]*N, [None]*N
+    V, v, vc = [None]*(N+1), [None]*(N+1), [None]*(N+1)
+    # Initialize constant cost-function terms to zero if not specified.
+    # They will be initialized to zero, meaning they have no effect on the update rules.
+    QN = np.zeros((n,n)) if QN is None else QN
+    qN = np.zeros((n,)) if qN is None else qN
+    qcN = 0 if qcN is None else qcN
+    H, q, qc, r = init_mat(H,m,n,N=N), init_mat(q,n,N=N), init_mat(qc,1,N=N), init_mat(r,m,N=N)
+    d = init_mat(d,n, N=N)
+    """ In the next line, you should initialize the last cost-term. This is similar to how we in DP had the initialization step
+    > J_N(x_N) = g_N(x_N)
+    Except that since x_N is no longer discrete, we store it as matrices/vectors representing a second-order polynomial, i.e.    
+    > J_N(X_N) = 1/2 * x_N' V[N] x_N + v[N]' x_N + vc[N]
+    """
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Initialize V[N], v[N], vc[N] here")
+
+    In = np.eye(n)
+    for k in range(N-1,-1,-1):
+        # When you update S_uu and S_ux remember to add regularization as the terms ... (V[k+1] + mu * In) ...
+        # Note that that to find x such that
+        # >>> x = A^{-1} y this
+        # in a numerically stable manner this should be done as
+        # >>> x = np.linalg.solve(A, y)
+        # The terms you need to update will be, in turn:
+        # Suu = ...
+        # Sux = ...
+        # Su = ...
+        # L[k] = ...
+        # l[k] = ...
+        # V[k] = ...
+        # V[k] = ...
+        # v[k] = ...
+        # vc[k] = ...
+        ## TODO: Half of each line of code in the following 4 lines have been replaced by garbage. Make it work and remove the error.
+        #----------------------------------------------------------------------------------------------------------------------------
+        # Suu = R[k] + B[k].T @ (????????????????????????
+        # Sux = H[k] + B[k].T @ (????????????????????????
+        # Su = r[k] + B[k].T @ v[k + 1?????????????????????????????
+        # L[k] = -np.linal?????????????????
+        raise NotImplementedError("Insert your solution and remove this error.")
+        l[k] = -np.linalg.solve(Suu, Su) # You get this for free. Notice how we use np.lingalg.solve(A,x) to compute A^{-1} x
+        V[k] = Q[k] + A[k].T @ V[k+1] @ A[k] - L[k].T @ Suu @ L[k]
+        V[k] = 0.5 * (V[k] + V[k].T)  # I recommend putting this here to keep V positive semidefinite
+        # You get these for free: Compare to the code in the algorithm.
+        v[k] = q[k] + A[k].T @ (v[k+1]  + V[k+1] @ d[k]) + Sux.T @ l[k]
+        vc[k] = vc[k+1] + qc[k] + d[k].T @ v[k+1] + 1/2*( d[k].T @ V[k+1] @ d[k] ) + 1/2*l[k].T @ Su
+
+    return (L,l), (V,v,vc)
+
+
+def init_mat(X, a, b=None, N=None):
+    """
+    Helper function. Check if X is None, and if so return a list
+      [A, A,....]
+    which is N long and where each A is a (a x b) zero-matrix, else returns X repeated N times:
+     [X, X, ...]
+    """
+    M0 = np.zeros((a,) if b is None else (a, b))
+    if X is not None:
+        return [m if m is not None else M0 for m in X]
+    else:
+        return [M0] * N
+
+def lqr_rollout(x0,A,B,d,L,l):
+    """
+    Compute a rollout (states and actions) given solution from LQR controller function.
+
+    x0 is a vector (starting state), and A, B, d and L, l are lists of system/control matrices.
+    """
+    x, states,actions = x0, [x0], []
+    n,m = B[0].shape
+    N = len(L)
+    d = init_mat(d,n,1,N)  # Initialize as a list of zero matrices [ np.zeros((n,1)), np.zeros((n,1)), ...]
+    l = init_mat(l,m,1,N)  # Initialize as a list of zero matrices [ np.zeros((m,1)), np.zeros((m,1)), ...]
+
+    for k in range(N):
+        u = L[k] @ x + l[k]
+        x = A[k] @ x + B[k] @ u + d[k]
+        actions.append(u)
+        states.append(x)
+    return states, actions
+
+if __name__ ==  "__main__":
+    """
+    Solve this problem (see also lecture notes for the same example)
+    http://cse.lab.imtlucca.it/~bemporad/teaching/ac/pdf/AC2-04-LQR-Kalman.pdf
+    """
+    N = 20
+    A = np.ones((2,2))
+    A[1,0] = 0
+    B = np.asarray([[0], [1]])
+    Q = np.zeros((2,2))
+    R = np.ones((1,1))
+
+    print("System matrices A, B, Q, R")
+    print(bmatrix(A))  
+    print(bmatrix(B))  
+    print(bmatrix(Q))  
+    print(bmatrix(R))  
+
+    for rho in [0.1, 10, 100]:
+        Q[0,0] = 1/rho
+        (L,l), (V,v,vc) = LQR(A=[A]*N, B=[B]*N, d=None, Q=[Q]*N, R=[R]*N, QN=Q)
+
+        x0 = np.asarray( [[1],[0]])
+        trajectory, actions = lqr_rollout(x0,A=[A]*N, B=[B]*N, d=None,L=L,l=l)
+
+        xs = np.concatenate(trajectory, axis=1)[0,:]
+
+        plt.plot(xs, 'o-', label=f'rho={rho}')
+
+        k = 10
+        print(f"Control matrix in u_k = L_k x_k + l_k at k={k}:", L[k])
+    for k in [N-1,N-2,0]:
+        print(f"L[{k}] is:", L[k].round(4))
+    plt.title("Double integrator")
+    plt.xlabel('Steps $k$')
+    plt.ylabel('$x_1 = $ x[0]')
+    plt.legend()
+    plt.grid()
+    savepdf("dlqr_double_integrator")
+    plt.show()
--- a/irlc/ex06/dlqr_check.py
+++ b/irlc/ex06/dlqr_check.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex06.dlqr import LQR
+
+def urnd(sz):
+    return np.random.uniform(-1, 1, sz)
+
+def check_LQR():
+    np.random.seed(42)
+    n,m,N = 3,2,4
+    """
+    Create a randomized, nonsense control problem and solve it. Since seed is fixed we can expect same solution.
+    """
+    # system tersm
+    A = [urnd((n, n)) for _ in range(N)]
+    B = [urnd((n, m)) for _ in range(N)]
+    d = [urnd((n,)) for _ in range(N)]
+    # cost terms
+    Q = [urnd((n, n)) for _ in range(N)]
+    R = [urnd((m, m)) for _ in range(N)]
+    H = [urnd((m, n)) for _ in range(N)]
+    q = [urnd((n,)) for _ in range(N)]
+    qc = [urnd(()) for _ in range(N)]
+    r = [urnd((m,)) for _ in range(N)]
+    # terminal costs
+    QN = urnd((n, n))
+    qN = urnd((n,))
+    qcN = urnd(())
+    return LQR(A=A, B=B, d=d, Q=Q, R=R, H=H, q=q, r=r, qc=qc, QN=QN, qN=qN, qcN=qcN, mu=0)
+
+
+if __name__ == "__main__":
+    (L, l), (V, v, vc) = check_LQR()
+    N = len(V)-1
+    print(", ".join([f"l[{k}]={l[k].round(4)}" for k in [N - 1, N - 2, 0]]))  
+    print("\n".join([f"L[{k}]={L[k].round(4)}" for k in [N - 1, N - 2, 0]]))
+
+    print("\n".join([f"V[{k}]={V[k].round(4)}" for k in [0]]))
+    print(", ".join([f"v[{k}]={v[k].round(4)}" for k in [N, N - 1, 0]]))
+    print(", ".join([f"vc[{k}]={vc[k].round(4)}" for k in [N, N - 1, 0]]))  
--- a/irlc/ex06/lqr_agent.py
+++ b/irlc/ex06/lqr_agent.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex04.locomotive import LocomotiveEnvironment
+from irlc import train, plot_trajectory, savepdf, Agent
+from irlc.ex06.dlqr import LQR
+from irlc.ex04.control_environment import ControlEnvironment
+import numpy as np
+import matplotlib.pyplot as plt
+
+class LQRAgent(Agent):
+    def __init__(self, env : ControlEnvironment, A, B, Q, R, d=None, q=None):
+        N = int((env.Tmax / env.dt)) # Obtain the planning horizon
+        """ Define A, B as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B x[t] + d.
+        You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B """
+        """ Define self.L, self.l here as the (lists of) control matrices. """
+        ## TODO: Half of each line of code in the following 1 lines have been replaced by garbage. Make it work and remove the error.
+        #----------------------------------------------------------------------------------------------------------------------------
+        # (self.L, self.l), _ = LQR(A=[A]*N, B=[B]*N, d=[d]*N if d is not No???????????????????????????????????????????????????????????????????
+        raise NotImplementedError("Insert your solution and remove this error.")
+        self.dt = env.dt
+        super().__init__(env)
+
+    def pi(self,x, k, info=None):
+        """
+        Compute the action here using u = L_k x + l_k.
+        You should use self.L, self.l to get the control matrices (i.e. L_k = self.L[k] ),
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute current action here")
+        return u
+
+
+if __name__ == "__main__":
+    # Make a guess at the system matrices for planning. We will return on how to compute these exactly in a later exercise.
+    A = np.ones((2, 2))
+    A[1, 0] = 0
+    B = np.asarray([[0], [1]])
+    Q = np.eye(2)*3
+    R = np.ones((1, 1))*2
+    q = np.asarray([-1.1, 0 ])
+
+    # Create and test our LQRAgent.
+    env = LocomotiveEnvironment(render_mode='human', Tmax=10, slope=1)
+    agent = LQRAgent(env, A=A, B=B, Q=Q, R=R, q=q)
+    stats, traj = train(env, agent, num_episodes=1)
+
+    env.reset()
+    savepdf("locomotive_snapshot.pdf", env=env) # Make a plot for the exercise file.
+    env.state_labels = ["x(t)", "v(t)"]
+    env.action_labels = ["u(t)"]
+    plot_trajectory(traj[0], env)
+    plt.show(block=True)
+    savepdf("lqr_agent")
+    plt.show()
+    env.close()
--- a/irlc/ex06/lqr_pid.py
+++ b/irlc/ex06/lqr_pid.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc import savepdf, train
+from irlc.ex04.pid_locomotive_agent import PIDLocomotiveAgent
+from irlc.ex06.lqr_agent import LQRAgent
+from irlc.ex04.model_harmonic import HarmonicOscilatorEnvironment
+from irlc.ex06.boeing_lqr import compute_A_B_d, compute_Q_R_q
+
+class ConstantLQRAgent(LQRAgent):
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Complete this agent here. You need to update the policy-function: def pi(self, ..).")
+
+def get_Kp_Kd(L0):
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Use lqr_agent.L to define Kp and Kd.")
+    return Kp, Kd
+
+
+if __name__ == "__main__":
+    Delta = 0.06  # Time discretization constant
+    # Define a harmonic osscilator environment. Use .., render_mode='human' to see a visualization.
+    env = HarmonicOscilatorEnvironment(Tmax=8, dt=Delta, m=0.5, R=np.eye(1) * 8, render_mode=None)  # set render_mode='human' to see the oscillator.
+    model = env.discrete_model.continuous_model # Get the ControlModel corresponding to this environment.
+
+
+    # Compute the discretized A, B and d matrices using the helper functions we defined in the Boeing problem.
+    # Note that these are for the discrete environment: x_{k+1} = A x_k + B u_k + d
+    A, B, d = compute_A_B_d(model, Delta)
+    Q, R, q = compute_Q_R_q(model, Delta)
+
+    # Run the LQR agent
+    lqr_agent = LQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+    _, traj1 = train(env, lqr_agent, return_trajectory=True)
+
+    # Part 1. Build an agent that always takes actions u_k = L_0 x_k + l_0
+    constant_agent = ConstantLQRAgent(env, A=A, B=B, d=d, Q=Q, R=R, q=q)
+    # Check that its policy is independent of $k$:
+    x0, _ = env.reset() 
+    print(f"Initial state is {x0=}")
+    print(f"Action at time step k=0 {constant_agent.pi(x0, k=0)=}")
+    print(f"Action at time step k=5 (should be the same) {constant_agent.pi(x0, k=0)=}") 
+
+    _, traj2 = train(env, constant_agent, return_trajectory=True)
+
+    # Part 2. Use the L and l matrices (see lqr_agent.L and lqr_agent.l)
+    # to select Kp and Kd in a PID agent. Then let's use the Locomotive agent to see the effect of the controller.
+    # Use render_mode='human' to see its effect.
+    # We only need to use L.
+    # Hint: compare the form of the LQR and PID controller and use that to select Kp and Kd.
+    Kp, Kd = get_Kp_Kd(lqr_agent.L[0]) # Use lqr_agent.L to define Kp and Kd.
+
+    # Define and run the PID agent.
+    pid_agent = PIDLocomotiveAgent(env, env.dt, Kp=Kp, Kd=Kd)
+    _, traj3 = train(env, pid_agent, return_trajectory=True)
+
+    # Plot all actions and state sequences.
+    plt.figure(figsize=(10,5))
+    plt.grid()
+    plt.plot(traj1[0].time[:-1], traj1[0].action, label="Optimal LQR action sequence")
+    plt.plot(traj2[0].time[:-1], traj2[0].action, '.-', label="Constant LQR action sequence")
+    plt.plot(traj3[0].time[:-1], traj3[0].action, label="PID agent action sequence")
+    plt.xlabel("Time / Seconds")
+    plt.ylabel("Action / Newtons")
+    plt.ylim([-.2, .2])
+    plt.legend()
+    savepdf("pid_lqr_actions")
+    plt.show(block=True)
+
+    plt.figure(figsize=(10, 5))
+    plt.grid()
+    plt.plot(traj1[0].time, traj1[0].state[:, 0], label="Optimal LQR states x(t)")
+    plt.plot(traj2[0].time, traj2[0].state[:, 0], label="Constant LQR states x(t)")
+    plt.plot(traj3[0].time, traj3[0].state[:, 0], label="PID agent states x(t)")
+    plt.xlabel("Time / Seconds")
+    plt.ylabel("Position x(t) / Meters")
+    plt.ylim([-1, 1])
+    plt.legend()
+    savepdf("pid_lqr_states")
--- a/irlc/ex06/model_boeing.py
+++ b/irlc/ex06/model_boeing.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+
+class BoeingModel(LinearQuadraticModel):
+    """
+    Boeing 747 level flight example.
+
+    See: https://books.google.dk/books?id=tXZDAAAAQBAJ&pg=PA147&lpg=PA147&dq=boeing+747+flight+0.322+model+longitudinal+flight&source=bl&ots=L2RpjCAWiZ&sig=ACfU3U2m0JsiHmUorwyq5REcOj2nlxZkuA&hl=en&sa=X&ved=2ahUKEwir7L3i6o3qAhWpl4sKHQV6CdcQ6AEwAHoECAoQAQ#v=onepage&q=boeing%20747%20flight%200.322%20model%20longitudinal%20flight&f=false
+    Also: https://web.stanford.edu/~boyd/vmls/vmls-slides.pdf
+    """
+    state_labels = ["Longitudinal velocity (x) ft/sec", "Velocity in y-axis ft/sec", "Angular velocity",
+                    "angle wrt. horizontal"]
+    action_labels = ['Elevator', "Throttle"]
+    observation_labels = ["Airspeed", "Climb rate"]
+
+    def __init__(self, output=None):
+        if output is None:
+            output = [10, 0]
+        # output = [10, 0]
+        A = [[-0.003, 0.039, 0, -0.322],
+             [-0.065, -.319, 7.74, 0],
+             [.02, -.101, -0.429, 0],
+             [0, 0, 1, 0]]
+        B = [[.01, 1],
+             [-.18, -.04],
+             [-1.16, .598],
+             [0, 0]]
+
+        A, B = np.asarray(A), np.asarray(B)
+        self.u0 = 7.74  # speed in hundred feet/seconds
+        self.P = np.asarray([[1, 0, 0, 0], [0, -1, 0, 7.74]])  # Projection of state into airspeed
+
+        dt = 0.1 # Scale the cost by this factor.
+
+        # Set up the cost:
+        self.Q_obs = np.eye(2)
+        Q = self.P.T @ self.Q_obs @ self.P / dt
+        R = np.eye(2) / dt
+        q = -np.asarray(output) @ self.Q_obs @ self.P / dt
+        super().__init__(A=A, B=B, Q=Q, R=R, q=q)
+
+    def state2outputs(self, x):
+        return self.P @ x
+
+class DiscreteBoeingModel(DiscreteControlModel):
+    def __init__(self, output=None):
+        model = BoeingModel(output=output)
+        dt = 0.1
+        super().__init__(model=model, dt=dt)
+
+
+class BoeingEnvironment(ControlEnvironment):
+    @property
+    def observation_labels(self):
+        return self.discrete_model.continuous_model.observation_labels
+
+    def __init__(self, Tmax=10):
+        model = DiscreteBoeingModel()
+        super().__init__(discrete_model=model, Tmax=Tmax)
--- a/irlc/ex06/model_rendevouz.py
+++ b/irlc/ex06/model_rendevouz.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.utils.graphics_util_pygame import UpgradedGraphicsUtil
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.model_linear_quadratic import LinearQuadraticModel
+from gymnasium.spaces import Box
+
+"""
+SEE: https://github.com/anassinator/ilqr/blob/master/examples/rendezvous.ipynb
+"""
+class ContiniousRendevouzModel(LinearQuadraticModel): 
+    state_labels= ["x0", "y0", "x1", "y1", 'Vx0', "Vy0", "Vx1", "Vy1"]
+    action_labels = ['Fx0', 'Fy0', "Fx1", "Fy1"]
+    x0 = np.array([0, 0, 10, 10, 0, -5, 5, 0])  # Initial state.
+
+    def __init__(self, m=10.0, alpha=0.1, simple_bounds=None, cost=None): 
+        m00 = np.zeros((4,4))
+        mI = np.eye(4)
+        A = np.block( [ [m00, mI], [m00, -alpha/m*mI] ] )
+        B = np.block( [ [m00], [mI/m]] )
+        state_size = len(self.x0)
+        action_size = 4
+        self.m = m
+        self.alpha = alpha
+        Q = np.eye(state_size)
+        Q[0, 2] = Q[2, 0] = -1
+        Q[1, 3] = Q[3, 1] = -1
+        R = 0.1 * np.eye(action_size)
+        self.viewer = None
+        super().__init__(A=A, B=B, Q=Q*20, R=R*20)
+
+    def x0_bound(self) -> Box:
+        return Box(self.x0, self.x0) # self.bounds['x0_low'] = self.bounds['x0_high'] = list(self.x0)
+
+    def render(self, x, render_mode="human"):
+        """ Render the environment. You don't have to understand this code.  """
+        if self.viewer is None:
+            self.viewer = HarmonicViewer(xstar=0, x0=self.x0) # target: x=0.
+        self.viewer.update(x)
+        import time
+        time.sleep(0.05)
+        return self.viewer.blit(render_mode=render_mode)
+
+    def close(self):
+        pass
+
+
+class DiscreteRendevouzModel(DiscreteControlModel): 
+    def __init__(self, dt=0.1, cost=None, transform_actions=True, **kwargs):
+        model = ContiniousRendevouzModel(**kwargs)
+        super().__init__(model=model, dt=dt, cost=cost) 
+
+class RendevouzEnvironment(ControlEnvironment): 
+    def __init__(self, Tmax=20, render_mode=None, **kwargs):
+        discrete_model = DiscreteRendevouzModel(**kwargs)
+        super().__init__(discrete_model, Tmax=Tmax, render_mode=render_mode) 
+
+class HarmonicViewer(UpgradedGraphicsUtil):
+    def __init__(self, xstar = 0, x0=None):
+        self.xstar = xstar
+        width = 800
+        self.x0 = x0
+        sz = 20
+        self.scale = width/(2*sz)
+        self.p1h = []
+        self.p2h = []
+        super().__init__(screen_width=width, xmin=-sz, xmax=sz, ymin=-sz, ymax=sz, title='Rendevouz environment')
+
+    def render(self):
+        self.draw_background(background_color=(255, 255, 255))
+        # dw = self.dw
+        p1 = self.x[:2]
+        p2 = self.x[2:4]
+        self.p1h.append(p1)
+        self.p2h.append(p2)
+        self.circle("asdf", pos=p1, r=.5 * self.scale, fillColor=(200, 0, 0))
+        self.circle("asdf", pos=p2, r=.5 * self.scale, fillColor=(0, 0, 200) )
+        if len(self.p1h) > 2:
+            self.polyline('...', np.stack(self.p1h)[:,0], np.stack(self.p1h)[:,1], width=1, color=(200, 0, 0))
+            self.polyline('...', np.stack(self.p2h)[:,0], np.stack(self.p2h)[:,1], width=1, color=(0, 0, 200))
+
+        if tuple(self.x) == tuple(self.x0):
+            self.p1h = []
+            self.p2h = []
+
+
+    def update(self, x):
+        self.x = x
+
+
+if __name__ == "__main__":
+    from irlc import Agent, train
+    env = RendevouzEnvironment(render_mode='human')
+    from irlc.ex06.lqr_agent import LQRAgent
+    a2 = LQRAgent(env=env)
+
+    stats, traj = train(env, Agent(env), num_episodes=1)
+    pass
--- a/irlc/ex07/__init__.py
+++ b/irlc/ex07/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""This directory contains the exercises for week 7."""
--- a/irlc/ex07/ilqr.py
+++ b/irlc/ex07/ilqr.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+  [TET12] Yuval Tassa, Tom Erez, and Emanuel Todorov. Synthesis and stabilization of complex behaviors through online trajectory optimization. In 2012 IEEE/RSJ International Conference on Intelligent Robots and Systems, 4906–4913. IEEE, 2012. (See tassa2012.pdf).
+  [Har20] James Harrison. Optimal and learning-based control combined course notes. (See AA203combined.pdf), 2020.
+"""
+r"""
+This implements two methods: The basic ILQR method, described in (Her25, Algorithm 24), and the linesearch-based method
+described in (Her25, Algorithm 25).
+
+If you are interested, you can consult (TET12) (which contains generalization to DDP) and (Har20, Alg 1).
+"""
+import warnings
+import numpy as np
+from irlc.ex06.dlqr import LQR
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+
+def ilqr_basic(model : DiscreteControlModel, N, x0, us_init : list = None, n_iterations=500, verbose=True):
+    r"""
+    Implements the basic ilqr algorithm, i.e. (Her25, Algorithm 24). Our notation (x_bar, etc.) will be consistent with the lecture slides
+    """
+    mu, alpha = 1, 1 # Hyperparameters. For now, just let them have defaults and don't change them
+    # Create a random initial state-sequence
+    n, m = model.state_size, model.action_size
+    u_bar = [np.random.uniform(-1, 1,(model.action_size,)) for _ in range(N)] if us_init is None else us_init
+    x_bar = [x0] + [np.zeros(n, )] * N
+    """
+    Initialize nominal trajectory xs, us using us and x0 (i.e. simulate system from x0 using action sequence us). 
+    The simplest way to do this is to call forward_pass with all-zero sequence of control vector/matrix l, L.
+    """
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Initialize x_bar, u_bar here")
+    J_hist = []
+    for i in range(n_iterations):
+        """
+        Compute derivatives around trajectory and cost estimate J of trajectory. To do so, use the get_derivatives
+        function. Remember the functions will return lists of derivatives.
+        """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Compute J and derivatives A_k = f_x, B_k = f_u, ....")
+        """  Backward pass: Obtain feedback law matrices l, L using the backward_pass function.
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute L, l = .... here")
+        """ Forward pass: Given L, l matrices computed above, simulate new (optimal) action sequence. 
+        In the lecture slides, this is similar to how we compute u^*_k and x_k
+        Once they are computed, iterate the iLQR algorithm by setting x_bar, u_bar equal to these values
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute x_bar, u_bar = ...")
+        if verbose:
+            print(f"{i}> J={J:4g}, change in cost since last iteration {0 if i == 0 else J-J_hist[-1]:4g}")
+        J_hist.append(J)
+    return x_bar, u_bar, J_hist, L, l
+
+def ilqr_linesearch(model : DiscreteControlModel, N, x0, n_iterations, us_init=None, tol=1e-6, verbose=True):
+    r"""
+    For linesearch implement method described in (Her25, Algorithm 25) (we will use regular iLQR, not DDP!)
+    """
+    # The range of alpha-values to try out in the linesearch
+    # plus parameters relevant for regularization scheduling.
+    alphas = 1.1 ** (-np.arange(10) ** 2)  # alphas = [1, 1.1^{-2}, ...]
+    mu_min = 1e-6
+    mu_max = 1e10
+    Delta_0 = 2
+    mu = 1.0
+    Delta = Delta_0
+
+    n, m = model.state_size, model.action_size
+    u_bar = [np.random.uniform(-1, 1, (model.action_size,)) for _ in range(N)] if us_init is None else us_init
+    x_bar = [x0] + [np.zeros(n, )] * (N)
+    # Initialize nominal trajectory xs, us (same as in basic linesearch)
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Copy-paste code from previous solution")
+    J_hist = []
+
+    converged = False
+    for i in range(n_iterations):
+        alpha_was_accepted = False
+        """ Step 1: Compute derivatives around trajectory and cost estimate of trajectory.
+        (copy-paste from basic implementation). In our implementation, J_bar = J_{u^star}(x_0) """
+        # TODO: 2 lines missing.
+        raise NotImplementedError("Obtain derivatives f_x, f_u, ... as well as cost of trajectory J_bar = ...")
+        try:
+            """
+            Step 2: Backward pass to obtain control law (l, L). Same as before so more copy-paste
+            """
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Obtain l, L = ... in backward pass")
+            """
+            Step 3: Forward pass and alpha scheduling.
+            Decrease alpha and check condition |J^new < J'|. Apply the regularization scheduling as needed. """
+            for alpha in alphas:
+                x_hat, u_hat = forward_pass(model, x_bar, u_bar, L=L, l=l, alpha=alpha) # Simulate trajectory using this alpha
+                # TODO: 1 lines missing.
+                raise NotImplementedError("Compute J_new = ... as the cost of trajectory x_hat, u_hat")
+
+                if J_new < J_prime:
+                    """ Linesearch proposed trajectory accepted! Set current trajectory equal to x_hat, u_hat. """
+                    if np.abs((J_prime - J_new) / J_prime) < tol:
+                        converged = True  # Method does not seem to decrease J; converged. Break and return.
+
+                    J_prime = J_new
+                    x_bar, u_bar = x_hat, u_hat
+                    '''
+                    The update was accepted and you should change the regularization term mu, 
+                     and the related scheduling term Delta.                   
+                    '''
+                    # TODO: 1 lines missing.
+                    raise NotImplementedError("Delta, mu = ...")
+                    alpha_was_accepted = True # accept this alpha
+                    break
+        except np.linalg.LinAlgError as e:
+            # Matrix in dlqr was not positive-definite and this diverged
+            warnings.warn(str(e))
+
+        if not alpha_was_accepted:
+            ''' No alphas were accepted, which is not too hot. Regularization should change
+            '''
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Delta, mu = ...")
+
+            if mu_max and mu >= mu_max:
+                raise Exception("Exceeded max regularization term; we are stuffed.")
+
+        dJ = 0 if i == 0 else J_prime-J_hist[-1]
+        info = "converged" if converged else ("accepted" if alpha_was_accepted else "failed")
+        if verbose:
+            print(f"{i}> J={J_prime:4g}, decrease in cost {dJ:4g} ({info}).\nx[N]={x_bar[-1].round(2)}")
+        J_hist.append(J_prime)
+        if converged:
+            break
+    return x_bar, u_bar, J_hist, L, l
+
+def backward_pass(A : list, B : list, c_x : list, c_u : list, c_xx : list, c_ux : list, c_uu : list, mu=1):
+    r"""Given all derivatives, apply the LQR algorithm to get the control law.
+
+    The input arguments are described in the online documentation and the lecture notes. You should use them to call your
+    implementation of the :func:`~irlc.ex06.dlqr.LQR` method. Note that you should give a value of all inputs except for the ``d``-term.
+
+    :param A: linearization of the dynamics matrices :math:`A_k`.
+    :param B:  linearization of the dynamics matrices :math:`B_k`.
+    :param c_x: Cost terms corresponding to :math:`\mathbf{q}_k`
+    :param c_u: Cost terms corresponding to :math:`\mathbf{r}_k`
+    :param c_xx: Cost terms corresponding to :math:`Q_k`
+    :param c_ux: Cost terms corresponding to :math:`H_k`
+    :param c_uu: Cost terms corresponding to :math:`R_k`
+    :param mu: Regularization parameter for the LQR method
+    :return: The control law :math:`L_k, \mathbf{l}_k` as two lists.
+    """
+    Q, QN = c_xx[:-1], c_xx[-1] # An example.
+    # TODO: 4 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    # Define the inputs using the linearization inputs.
+    (L, l), (V, v, vc) = LQR(A=A, B=B, R=R, Q=Q, QN=QN, H=H, q=q, qN=qN, r=r, mu=mu)
+    return L, l
+
+def cost_of_trajectory(model : DiscreteControlModel, xs : list, us : list) -> float:
+    r"""Helper function which computes the cost of the trajectory.
+
+    The cost is defined as:
+
+    .. math::
+
+        c_N( \bar {\mathbf x}_N, \bar {\mathbf u}_) + \sum_{k=0}^{N-1} c_k(\bar {\mathbf x}_k, \bar {\mathbf u}_k)
+
+    and to compute it, you should use the two helper methods ``model.cost.c`` and ``model.cost.cN``
+    (see :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.c` and :func:`~irlc.ex04.discrete_control_cost.DiscreteQRCost.cN`).
+
+    :param model: The control model used to compute the cost.
+    :param xs: A list of length :math:`N+1` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_N \end{bmatrix}`
+    :param us: A list of length :math:`N` of the form :math:`\begin{bmatrix}\bar {\mathbf x}_0 & \dots & \bar {\mathbf x}_{N-1} \end{bmatrix}`
+    :return: The cost as a number.
+    """
+    N = len(us)
+    JN = model.cost.cN(xs[-1])
+    return sum(map(lambda args:  model.cost.c(*args), zip(xs[:-1], us, range(N)))) + JN
+
+def get_derivatives(model : DiscreteControlModel, x_bar : list, u_bar : list):
+    """Compute all the derivatives used in the model.
+
+    The return type should match the meaning in (Her25, Subequation 17.8) and in the online documentation.
+
+    - ``c`` should be a list of length :math:`N+1`
+    - ``c_x`` should be a list of length :math:`N+1`
+    - ``c_xx`` should be a list of length :math:`N+1`
+    - ``c_u`` should be a list of length :math:`N`
+    - ``c_uu`` should be a list of length :math:`N`
+    - ``c_ux`` should be a list of length :math:`N`
+    - ``A`` should be a list of length :math:`N`
+    - ``B`` should be a list of length :math:`N`
+
+    Use the model to compute these terms. For instance, this will compute the first terms ``A[0]`` and ``B[0]``::
+
+        A0, B0 = model.f_jacobian(x_bar[0], u_bar[0], 0)
+
+    Meanwhile, to compute the first terms of the cost-functions you should use::
+
+        c[0], c_x[0], c_u[0], c_xx[0], c_ux[0], c_uu[0] = model.cost.c(x_bar[0], u_bar[0], k=0, compute_gradients=True)
+
+    :param model: The model to use when computing the derivatives of the cost
+    :param x_bar: The nominal state-trajectory
+    :param u_bar: The nominal action-trajectory
+    :return: Lists of all derivatives computed around the nominal trajectory (see the lecture notes).
+    """
+    N = len(u_bar)
+    """ Compute A_k, B_k (lists of matrices of length N) as the jacobians of the dynamics. To do so, 
+    recall from the online documentation that: 
+        x, f_x, f_u = model.f(x, u, k, compute_jacobian=True)
+    """
+    A = [None]*N
+    B = [None]*N
+    c = [None] * (N+1)
+    c_x = [None] * (N + 1)
+    c_xx = [None] * (N + 1)
+
+    c_u = [None] * (N+1)
+    c_ux = [None] * (N + 1)
+    c_uu = [None] * (N + 1)
+    # Now update each entry correctly (i.e., ensure there are no None elements left).
+    # TODO: 4 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    """ Compute derivatives of the cost function. For terms not including u these should be of length N+1 
+    (because of gN!), for the other lists of length N
+    recall model.cost.c has output:
+        c[i], c_x[i], c_u[i], c_xx[i], c_ux[i], c_uu[i] = model.cost.c(x, u, i, compute_gradients=True)
+    """
+    # TODO: 2 lines missing.
+    raise NotImplementedError("Insert your solution and remove this error.")
+    # Concatenate the derivatives associated with the last time point N.
+    cN, c_xN, c_xxN = model.cost.cN(x_bar[N], compute_gradients=True)
+    # TODO: 3 lines missing.
+    raise NotImplementedError("Update c, c_x and c_xx with the terminal terms.")
+    return A, B, c, c_x, c_u, c_xx, c_ux, c_uu
+
+def forward_pass(model : DiscreteControlModel, x_bar : list, u_bar : list, L : list, l : list, alpha=1.0):
+    r"""Simulates the effect of the controller on the model
+
+    We assume the system starts in :math:`\mathbf{x}_0 = \bar {\mathbf x}_0`, and then simulate the effect of
+    generating actions using the closed-loop policy
+
+    .. math::
+
+        \mathbf{u}_k = \bar {\mathbf u}_k + \alpha \mathbf{l}_k + L_k (\mathbf{x}_k - \bar { \mathbf x}_k)
+
+    (see  (Her25, eq. (17.16))).
+
+    :param model: The model used to compute the dynamics.
+    :param x_bar: A nominal list of states
+    :param u_bar: A nominal list of actions (not used by the method)
+    :param L: A list of control matrices :math:`L_k`
+    :param l: A list of control vectors :math:`\mathbf{l}_k`
+    :param alpha: The linesearch parameter.
+    :return: A list of length :math:`N+1` of simulated states and a list of length :math:`N` of simulated actions.
+    """
+    N = len(u_bar)
+    x = [None] * (N+1)
+    u_star = [None] * N
+    x[0] = x_bar[0].copy()
+
+    for i in range(N):
+        r""" Compute using (Her25, eq. (17.16))
+        u_{i} = ...
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("u_star[i] = ....")
+        """ Remember to compute 
+        x_{i+1} = f_k(x_i, u_i^*)        
+        here:
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("x[i+1] = ...")
+    return x, u_star
--- a/irlc/ex07/ilqr_agent.py
+++ b/irlc/ex07/ilqr_agent.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex06.model_rendevouz import RendevouzEnvironment
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import train
+from irlc import Agent
+import numpy as np
+
+class ILQRAgent(Agent):
+    def __init__(self, env, discrete_model, N=250, ilqr_iterations=10, use_ubar=False, use_linesearch=True):
+        super().__init__(env)
+        self.dt = discrete_model.dt
+        # x0 = discrete_model.reset()
+        x0,_ = env.reset()
+        x0 = np.asarray(x0) # Get the initial state. We will take this from the environment.
+        xs, us, self.J_hist, L, l = ilqr(discrete_model, N, x0, n_iter=ilqr_iterations, use_linesearch=use_linesearch)
+        self.ubar = us
+        self.xbar = xs
+        self.L = L
+        self.l = l
+        self.use_ubar = use_ubar # Should policy use open-loop u-bar (suboptimal) or closed-loop L_k, l_k?
+
+    def pi(self, x, k, info=None):
+        if self.use_ubar:
+            u = self.ubar[k]
+        else:
+            if k >= len(self.ubar):
+                print(k, len(self.ubar))
+                k = len(self.ubar)-1
+            # See (Her25, eq. (17.16))
+            # TODO: 1 lines missing.
+            raise NotImplementedError("Generate action using the control matrices.")
+        return u
+
+def solve_rendevouz():
+    env = RendevouzEnvironment()
+    N = int(env.Tmax / env.dt)
+    agent = ILQRAgent(env, env.discrete_model, N=N)
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+    return stats, trajectories, agent
+
+if __name__ == "__main__":
+    from irlc.ex07.ilqr_rendovouz_basic import plot_vehicles
+    import matplotlib.pyplot as plt
+    stats, trajectories, agent = solve_rendevouz()
+    t =trajectories[0].state
+    xb = agent.xbar
+    plot_vehicles(t[:,0], t[:,1], t[:,2], t[:,3], linespec=':', legend=("RK4 policy simulation", "RK4 policy simulation"))
+    plot_vehicles(xb[:,0], xb[:,1], xb[:,2], xb[:,3], linespec='-')
+    plt.legend()
+    plt.show()
--- a/irlc/ex07/ilqr_cartpole.py
+++ b/irlc/ex07/ilqr_cartpole.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+import time
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import savepdf
+
+# Number of steps.
+N = 100
+def cartpole(use_linesearch):
+    env = GymSinCosCartpoleEnvironment(render_mode='human')
+    x0, info = env.reset()
+    xs, us, J_hist, L, l = ilqr(env.discrete_model, N, x0, n_iter=300, use_linesearch=use_linesearch)
+    plot_cartpole(env, xs, us, use_linesearch=use_linesearch)
+
+def plot_cartpole(env, xs, us, J_hist=None, use_linesearch=True):
+    animate(xs, env)
+    env.close()
+    # Transform actions/states using build-in functions.
+    def gapply(f, xm):
+        usplit = np.split(xm, len(xm))
+        u2 = [f(u.flat) for u in usplit]
+        us = np.stack(u2)
+        return us
+
+    us = gapply(env.discrete_model.phi_u_inv, us)
+    xs = gapply(env.discrete_model.phi_x_inv, xs)
+
+    t = np.arange(N + 1) * env.dt
+    x = xs[:, 0]
+    theta = np.unwrap(xs[:, 2])  # Makes for smoother plots.
+    theta_dot = xs[:, 3]
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    ev = 'cartpole_'
+
+    plt.plot(theta, theta_dot)
+    plt.xlabel("theta (rad)")
+    plt.ylabel("theta_dot (rad/s)")
+    plt.title("Orientation Phase Plot")
+    plt.grid()
+    savepdf(f"{ev}theta{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t[:-1], us)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Force (N)")
+    _ = plt.title("Action path")
+    plt.grid()
+    savepdf(f"{ev}action{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t, x)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Position (m)")
+    _ = plt.title("Cart position")
+    plt.grid()
+    savepdf(f"{ev}position{pdf_ex}")
+    plt.show()
+    if J_hist is not None:
+        _ = plt.plot(J_hist)
+        _ = plt.xlabel("Iteration")
+        _ = plt.ylabel("Total cost")
+        _ = plt.title("Total cost-to-go")
+        plt.grid()
+        savepdf(f"{ev}J{pdf_ex}")
+        plt.show()
+
+def animate(xs0, env):
+    render = True
+    if render:
+        for i in range(2):
+            render_(xs0, env.discrete_model)
+            time.sleep(1)
+        # env.viewer.close()
+
+def render_(xs, env):
+    for i in range(xs.shape[0]):
+        x = xs[i]
+        env.render(x=x)
+
+if __name__ == "__main__":
+    cartpole(use_linesearch=True)
--- a/irlc/ex07/ilqr_cartpole_agent.py
+++ b/irlc/ex07/ilqr_cartpole_agent.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc import savepdf
+import matplotlib.pyplot as plt
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+
+def cartpole_experiment(N=12, use_linesearch=True, figex="", animate=True):
+    np.random.seed(2)
+    Tmax = .9
+    dt = Tmax/N
+    env = GymSinCosCartpoleEnvironment(dt=dt, Tmax=Tmax, supersample_trajectory=True, render_mode='human' if animate else None)
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+
+    agent.use_ubar = True
+    stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+
+    xb = agent.xbar
+    tb = np.arange(N+1)*dt
+    plt.figure(figsize=(8,6))
+    F = 3
+    plt.plot(trajectories[0].time, trajectories[0].state[:,F], 'k-', label='Closed-loop $\\pi$')
+    plt.plot(trajectories2[0].time, trajectories2[0].state[:,F], '-', label='Open-loop $\\bar{u}_k$')
+
+    plt.plot(tb, xb[:,F], '.-', label="iLQR rediction $\\bar{x}_k$")
+    plt.xlabel("Time/seconds")
+    plt.ylabel("$\cos(\\theta)$")
+    plt.title(f"Cartpole environment $T={N}$")
+
+    plt.grid()
+    plt.legend()
+    ev = "pendulum"
+    savepdf(f"irlc_cartpole_theta_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+def plt_cartpole():
+    cartpole_experiment(N=50, use_linesearch=True, animate=True)
+
+if __name__ == '__main__':
+    plt_cartpole()
--- a/irlc/ex07/ilqr_pendulum.py
+++ b/irlc/ex07/ilqr_pendulum.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.model_pendulum import DiscreteSinCosPendulumModel
+import matplotlib.pyplot as plt
+import time
+from irlc.ex07.ilqr_rendovouz_basic import ilqr
+from irlc import savepdf
+
+def pendulum(use_linesearch):
+    print("> Using iLQR to solve Pendulum swingup task. Using linesearch?", use_linesearch)
+    dt = 0.02
+    model = DiscreteSinCosPendulumModel(dt, cost=None)
+    N = 250
+    # This rather clunky line gets us the initial state; we transform the bound by the variable transformation.
+    x0 = np.asarray(model.phi_x(model.continuous_model.x0_bound().low))
+
+    n_iter = 200 # Use 200 iLQR iterations.
+    # xs, us, J_hist, L, l = ilqr(model, ...) Write a function call like this, but with the correct parametesr
+    # TODO: 1 lines missing.
+    raise NotImplementedError("Call iLQR here (see hint above).")
+
+    render = True
+    if render:
+        for i in range(2):
+            render_(xs, model)
+            time.sleep(2) # Sleep for two seconds between simulations.
+    model.close()
+    xs = np.asarray([model.phi_x_inv(x) for x, u in zip(xs, us)]) # Convert to Radians. We use the build-in functions to change coordinates.
+    xs, us = np.asarray(xs), np.asarray(us)
+
+    t = np.arange(N) * dt
+    theta = np.unwrap(xs[:, 0])  # Makes for smoother plots.
+    theta_dot = xs[:, 1]
+
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    stitle = "(using linesearch)" if use_linesearch else "(not using linesearch) "
+    ev = 'pendulum_'
+    _ = plt.plot(theta, theta_dot)
+    _ = plt.xlabel("$\\theta$ (rad)")
+    _ = plt.ylabel("$d\\theta/dt$ (rad/s)")
+    _ = plt.title(f"Phase Plot {stitle}")
+    plt.grid()
+    savepdf(f"{ev}theta{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(t, us)
+    _ = plt.xlabel("time (s)")
+    _ = plt.ylabel("Force (N)")
+    _ = plt.title(f"Action path {stitle}")
+    plt.grid()
+    savepdf(f"{ev}action{pdf_ex}")
+    plt.show()
+
+    _ = plt.plot(J_hist)
+    _ = plt.xlabel("Iteration")
+    _ = plt.ylabel("Total cost")
+    _ = plt.title(f"Total cost-to-go {stitle}")
+    plt.grid()
+    savepdf(f"{ev}J{pdf_ex}")
+    plt.show()
+
+def render_(xs, env):
+    for i in range(xs.shape[0]):
+        env.render(xs[i])
+
+if __name__ == "__main__":
+    pendulum(use_linesearch=False)
+    pendulum(use_linesearch=True)
--- a/irlc/ex07/ilqr_pendulum_agent.py
+++ b/irlc/ex07/ilqr_pendulum_agent.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+from irlc.ex04.model_pendulum import GymSinCosPendulumEnvironment
+from irlc.ex07.ilqr_agent import ILQRAgent
+from irlc import train
+from irlc import savepdf
+import matplotlib.pyplot as plt
+
+Tmax = 3
+def pen_experiment(N=12, use_linesearch=True,figex="", animate=True):
+    dt = Tmax / N
+    env = GymSinCosPendulumEnvironment(dt, Tmax=Tmax, supersample_trajectory=True, render_mode="human" if animate else None)
+    agent = ILQRAgent(env, env.discrete_model, N=N, ilqr_iterations=200, use_linesearch=use_linesearch)
+
+    stats, trajectories = train(env, agent, num_episodes=1, return_trajectory=True)
+
+    agent.use_ubar = True
+    stats2, trajectories2 = train(env, agent, num_episodes=1, return_trajectory=True)
+    env.close()
+
+    plot_pendulum_trajectory(env, trajectories[0], label='Closed-loop $\\pi$')
+    xb = agent.xbar
+    tb = np.arange(N+1)*dt
+    plt.figure(figsize=(12, 6))
+    plt.plot(trajectories[0].time, trajectories[0].state[:,1], '-', label='Closed-loop $\\pi(x_k)$')
+
+    plt.plot(trajectories2[0].time, trajectories2[0].state[:,1], '-', label='Open-loop $\\bar{u}_k$')
+    plt.plot(tb, xb[:,1], 'o-', label="iLQR prediction $\\bar{x}_k$")
+    plt.grid()
+    plt.legend()
+    ev = "pendulum"
+    savepdf(f"irlc_pendulum_theta_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+    ## Plot J
+    plt.figure(figsize=(6, 6))
+    plt.semilogy(agent.J_hist, 'k.-')
+    plt.xlabel("iLQR Iterations")
+    plt.ylabel("Cost function estimate $J$")
+    # plt.title("Last value: {")
+    plt.grid()
+    savepdf(f"irlc_pendulum_J_N{N}_{use_linesearch}{figex}")
+    plt.show()
+
+def plot_pendulum_trajectory(env, traj, style='k.-', label=None, action=False, **kwargs):
+    if action:
+        y = traj.action[:, 0]
+        y = np.clip(y, env.action_space.low[0], env.action_space.high[0])
+    else:
+        y = traj.state[:, 1]
+
+    plt.plot(traj.time[:-1] if action else traj.time, y, style, label=label, **kwargs)
+    plt.xlabel("Time/seconds")
+    if action:
+        plt.ylabel("Torque $u$")
+    else:
+        plt.ylabel(r"$\cos(\theta)$")
+    plt.grid()
+    pass
+
+N = 50
+if __name__ == "__main__":
+    pen_experiment(N=N, use_linesearch=True)
--- a/irlc/ex07/ilqr_rendevoyz.py
+++ b/irlc/ex07/ilqr_rendevoyz.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc.ex07.ilqr_rendovouz_basic import plot_rendevouz
+
+if __name__ == "__main__":
+    plot_rendevouz(use_linesearch=True)
--- a/irlc/ex07/ilqr_rendovouz_basic.py
+++ b/irlc/ex07/ilqr_rendovouz_basic.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+import numpy as np
+import matplotlib.pyplot as plt
+from irlc import savepdf
+from irlc.ex07.ilqr import ilqr_basic, ilqr_linesearch
+from irlc.ex06.model_rendevouz import DiscreteRendevouzModel
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+# plt.ion()
+
+
+def ilqr(model : DiscreteControlModel, N, x0, n_iter, use_linesearch, verbose=True):
+    if not use_linesearch:
+        xs, us, J_hist, L, l = ilqr_basic(model, N, x0, n_iterations=n_iter,verbose=verbose) 
+    else:
+        xs, us, J_hist, L, l = ilqr_linesearch(model, N, x0, n_iterations=n_iter, tol=1e-6,verbose=verbose)
+    xs, us = np.stack(xs), np.stack(us)
+    return xs, us, J_hist, L, l
+
+def plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2")):
+    _ = plt.title("Trajectory of the two omnidirectional vehicles")
+    _ = plt.plot(x_0, y_0, "r"+linespec, label=legend[0])
+    _ = plt.plot(x_1, y_1, "b"+linespec, label=legend[1])
+
+Tmax = 20
+def solve_rendovouz(use_linesearch=False):
+    model = DiscreteRendevouzModel()
+    x0 = np.asarray(model.continuous_model.x0_bound().low) # Starting position
+    N = int(Tmax/model.dt)
+    return ilqr(model, N, x0, n_iter=10, use_linesearch=use_linesearch), model
+
+def plot_rendevouz(use_linesearch=False):
+    (xs, us, J_hist, _, _), env = solve_rendovouz(use_linesearch=use_linesearch)
+    N = int(Tmax / env.dt)
+    dt = env.dt
+    x_0 = xs[:, 0]
+    y_0 = xs[:, 1]
+    x_1 = xs[:, 2]
+    y_1 = xs[:, 3]
+    x_0_dot = xs[:, 4]
+    y_0_dot = xs[:, 5]
+    x_1_dot = xs[:, 6]
+    y_1_dot = xs[:, 7]
+
+    pdf_ex = '_linesearch' if use_linesearch else ''
+    ev = 'rendevouz_'
+    plot_vehicles(x_0, y_0, x_1, y_1, linespec='-', legend=("Vehicle 1", "Vehicle 2"))
+    plt.legend()
+    savepdf(f'{ev}trajectory{pdf_ex}')
+    plt.show()
+
+    t = np.arange(N + 1) * dt
+    _ = plt.plot(t, x_0, "r")
+    _ = plt.plot(t, x_1, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("x (m)")
+    _ = plt.title("X positional paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_x_pos{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, y_0, "r")
+    _ = plt.plot(t, y_1, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("y (m)")
+    _ = plt.title("Y positional paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_y_pos{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, x_0_dot, "r")
+    _ = plt.plot(t, x_1_dot, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("x_dot (m)")
+    _ = plt.title("X velocity paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_vx{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(t, y_0_dot, "r")
+    _ = plt.plot(t, y_1_dot, "b")
+    _ = plt.xlabel("Time (s)")
+    _ = plt.ylabel("y_dot (m)")
+    _ = plt.title("Y velocity paths")
+    _ = plt.legend(["Vehicle 1", "Vehicle 2"])
+    savepdf(f'{ev}vehicles_vy{pdf_ex}')
+    plt.show()
+
+    _ = plt.plot(J_hist)
+    _ = plt.xlabel("Iteration")
+    _ = plt.ylabel("Total cost")
+    _ = plt.title("Total cost-to-go")
+    savepdf(f'{ev}cost_to_go{pdf_ex}')
+    plt.show()
+    pass
+
+
+if __name__ == "__main__":
+    plot_rendevouz(use_linesearch=False)
--- a/irlc/ex07/linearization_agent.py
+++ b/irlc/ex07/linearization_agent.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+"""
+
+References:
+  [Her25] Tue Herlau. Sequential decision making. (Freely available online), 2025.
+"""
+from irlc.ex06.dlqr import LQR
+from irlc import Agent
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+from irlc import train, savepdf
+import matplotlib.pyplot as plt
+import numpy as np
+from irlc.ex04.control_environment import ControlEnvironment
+from irlc.ex04.discrete_control_model import DiscreteControlModel
+
+class LinearizationAgent(Agent):
+    """ Implement the simple linearization procedure described in (Her25, Algorithm 23) which expands around a single fixed point. """
+    def __init__(self, env: ControlEnvironment, model : DiscreteControlModel, xbar=None, ubar=None):
+        self.model = model
+        N = 50  # Plan on this horizon. The control matrices will converge fairly quickly.
+        """ Define A, B, d as the list of A/B matrices here. I.e. x[t+1] = A x[t] + B u[t] + d.
+        You should use the function model.f to do this, which has build-in functionality to compute Jacobians which will be equal to A, B.
+        It is important that you linearize around xbar, ubar. See (Her25, Section 17.1) for further details. """
+        # TODO: 4 lines missing.
+        raise NotImplementedError("Insert your solution and remove this error.")
+        Q, q, R = self.model.cost.Q, self.model.cost.q, self.model.cost.R
+        """ Define self.L, self.l here as the (lists of) control matrices. """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute control matrices L, l here using LQR(...)")
+        super().__init__(env)
+
+    def pi(self, x, k, info=None):
+        """
+        Compute the action here using u_k = L_0 x_k + l_0. The control matrix/vector L_0 can be found as the output from LQR, i.e.
+        L_0 = L[0] and l_0 = l[0].
+
+        The reason we use L_0, l_0 (and not L_k, l_k) is because the LQR problem itself is an approximation of the true dynamics
+        and this controller will be able to balance the pendulum for an infinite amount of time.
+        """
+        # TODO: 1 lines missing.
+        raise NotImplementedError("Compute current action here")
+        return u
+
+
+def get_offbalance_cart(waiting_steps=30, sleep_time=0.1):
+    env = GymSinCosCartpoleEnvironment(Tmax=3, render_mode='human')
+    env.reset()
+    import time
+    time.sleep(sleep_time)
+    env.state = env.discrete_model.x_upright
+    env.state[-1] = 0.01 # a bit of angular speed.
+    for _ in range(waiting_steps):  # Simulate the environment for 30 steps to get things out of balance.
+        env.step(1)
+        time.sleep(sleep_time)
+    return env
+
+
+if __name__ == "__main__":
+    np.random.seed(42) # I don't think these results are seed-dependent but let's make sure.
+    from irlc import plot_trajectory
+    env = get_offbalance_cart(4) # Simulate for 4 seconds to get the cart off-balance. Same idea as PID control.
+    agent = LinearizationAgent(env, model=env.discrete_model, xbar=env.discrete_model.x_upright, ubar=env.action_space.sample()*0)
+    _, trajectories = train(env, agent, num_episodes=1, return_trajectory=True, reset=False)  # Note reset=False to maintain initial conditions.
+    plot_trajectory(trajectories[0], env, xkeys=[0,2, 3], ukeys=[0])
+    env.close()
+    savepdf("linearization_cartpole")
+    plt.show()
--- a/irlc/lectures/lec05/__init__.py
+++ b/irlc/lectures/lec05/__init__.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
--- a/irlc/lectures/lec05/lecture_05_carpole_random.py
+++ b/irlc/lectures/lec05/lecture_05_carpole_random.py
+# This file may not be shared/redistributed without permission. Please read copyright notice in the git repo. If this file contains other copyright notices disregard this text.
+from irlc import Agent, train
+from irlc.ex05.model_cartpole import GymSinCosCartpoleEnvironment
+
+if __name__ == "__main__":
+
+    env = GymSinCosCartpoleEnvironment(Tmax=20, render_mode='human')
+    train(env, Agent(env), num_episodes=1)
+    env.close()