weeks 8-10

7f1b176d · Vedrana Andersen Dahl · af7825bb · 7f1b176d · 7f1b176d · 7f1b176d
Commit 7f1b176d authored 3 months ago by Vedrana Andersen Dahl
--- a/Week08/demo_forward.py
+++ b/Week08/demo_forward.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 24 14:33:58 2021
+
+@author: abda
+"""
+
+#%%
+import numpy as np
+import matplotlib.pyplot as plt
+import make_data
+
+#%%
+n = 500
+example_nr = 1
+noise = 1
+
+X, T, x, dim = make_data.make_data(example_nr, n, noise)
+fig, ax = plt.subplots(1,1)
+ax.scatter(X[0:n,0],X[0:n,1],c = 'red', alpha = 0.3, s = 15)
+ax.scatter(X[n:2*n,0],X[n:2*n,1],c = 'green', alpha = 0.3, s = 15)
+ax.set_aspect('equal', 'box')
+plt.title('training')
+fig.show
+
+#%%
+
+Xo = x.reshape((100,100,2))
+plt.imshow(Xo[:,:,1])
+
+
+#%%
+
+m = np.mean(X,axis = 0)
+s = np.std(X,axis = 0)
+
+Xc = (X - m)/s
+xc = (x - m)/s
+
+#%%
+
+
+n_hidden = 10
+W = []
+W.append(np.random.randn(3,n_hidden)*np.sqrt(2/3))
+W.append(np.random.randn(n_hidden+1,2)*np.sqrt(2/4))
+
+def forwardsimple(xb, W):
+    n_pts = xb.shape[0]
+    z = np.c_[xb, np.ones(n_pts)]@W[0]
+    h = np.maximum(z, 0)
+    yh = np.c_[h, np.ones(n_pts)]@W[1]
+    y = np.exp(yh)/np.sum(np.exp(yh),axis=1,keepdims=True)
+    return y, h
+
+y, h = forwardsimple(xc, W)
+
+
+Y = y.reshape((100,100,2))
+plt.imshow(Y[:,:,1])
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# %%
--- a/Week08/make_data.py
+++ b/Week08/make_data.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 25 08:47:29 2020
+
+@author: abda
+"""
+
+#%%
+import numpy as np
+
+def make_data(example_nr, n = 200, noise = 1):
+# Generate data for training a simple neural network.
+# 
+# def make_data(example_nr, n = 200, noise = 1):
+# ...
+#     return X, T, x, dim
+# 
+# Input:
+#   example_nr - a number 1 - 3 for each example
+#   n - number of points in each data set
+#   noise - a number to increase or decrease the noise level (if changed, 
+#       choose between 0.5 and 2)
+# Output:
+#   X - 2n x 2 array of points (there are n points in each class)
+#   T - 2n x 2 target values
+#   x - regular sampled points on the area covered by the points that will
+#       be used for testing the neural network
+#   dim - dimensionality of the area covered by the points
+# 
+# Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 25/3-2020
+#   vand@dtu.dk, abda@dtu.dk
+# 
+
+    if ( n % 2 == 1 ):
+        n += 1
+    dim = np.array([100, 100])
+    QX, QY = np.meshgrid(range(0,dim[0]), range(0,dim[1]))
+    x = np.c_[np.ravel(QX), np.ravel(QY)]
+    K = np.array([n,n])
+    T = np.r_[np.ones((n,1))*np.array([1,0]), np.ones((n,1))*np.array([0,1])]
+    if example_nr == 1 :
+        X = np.r_[noise*10*np.random.randn(K[0],2) + np.array([30,30]),
+                  noise*10*np.random.randn(K[1],2) + np.array([70,70])]
+    elif example_nr == 2 :
+        rand_ang = np.random.rand(K[0])*2*np.pi
+        X = np.r_[noise*5*np.random.randn(K[0],2) + 30*np.array([np.cos(rand_ang), np.sin(rand_ang)]).T, 
+                  noise*5*np.random.randn(K[1],2)] + dim/2
+    elif example_nr == 3 :
+        X = np.r_[noise*10*np.random.randn(int(K[0]/2),2) + np.array([30,30]), 
+                  noise*10*np.random.randn(int(K[0]/2),2) + np.array([70,70]),
+                  noise*10*np.random.randn(int(K[1]/2),2) + np.array([30,70]),
+                  noise*10*np.random.randn(int(K[1]/2),2) + np.array([70,30])]
+    else:
+        X = np.zeros((K[0] + K[1],2))
+        print('No data returned - example_nr must be 1, 2, or 3')
+    return X, T, x, dim
+
+# Test of the data generation
+if __name__ == "__main__":
+    import matplotlib.pyplot as plt
+    
+    n = 1000
+    example_nr = 2
+    noise = 1.2
+    
+    X, T, x, dim = make_data(example_nr, n, noise)
+    fig, ax = plt.subplots(1,1)
+    ax.scatter(X[0:n,0],X[0:n,1],c = 'red', alpha = 0.3, s = 15)
+    ax.scatter(X[n:2*n,0],X[n:2*n,1],c = 'green', alpha = 0.3, s = 15)
+    ax.set_aspect('equal', 'box')
+    plt.title('training')
+    fig.show
+    
+    
+    
+    #%% Before training, you should make data have zero mean
+    
+    c = np.mean(X)
+    x_c = x - c
+    X_c = X - c
+    
+    fig, ax = plt.subplots(1,1)
+    ax.scatter(X_c[0:n,0],X_c[0:n,1],c = 'red', alpha = 0.3, s = 15)
+    ax.scatter(X_c[n:2*n,0],X_c[n:2*n,1],c = 'green', alpha = 0.3, s = 15)
+    ax.set_aspect('equal', 'box')
+    plt.title('Zero mean training')
+    fig.show
+
+
+
+
+
+
+
--- a/Week08/mlp_basic.py
+++ b/Week08/mlp_basic.py
+#%%
+
+import numpy as np
+
+rg = np.random.default_rng()
+
+def initialize(mlp_size):
+    
+    W = []
+    for l in range(len(mlp_size) - 1):
+        size=(mlp_size[l] + 1, mlp_size[l + 1])
+        W.append(rg.normal(scale=np.sqrt(2/size[0]), size=size))
+    return W
+
+
+def predict(x, W):
+    '''Returns y without saving hidden layers.'''
+    
+    c = x
+    for l in range(len(W) - 1):
+        c = W[l].T @ np.vstack((c, np.ones(c.shape[1])))
+        c = np.maximum(c, 0)
+    c = W[-1].T @ np.vstack((c, np.ones(c.shape[1])))
+
+    c = np.exp(c)
+    c = np.clip(c, 1e-15, 1e15)  # to avoid division by 0 a
+    c *= (1 / c.sum(axis=0))
+
+    return c
+
+def forward(x, W):
+    '''Returns hidden layers with yhat as the last element.'''
+
+    h = []
+    c = x
+
+    for l in range(len(W) - 1):
+        c = W[l].T @ np.vstack((c, np.ones(c.shape[1])))
+        c = np.maximum(c, 0)
+        h.append(c)
+
+    c = W[-1].T @ np.vstack((c, np.ones(c.shape[1])))
+    h.append(c)
+    return h
+
+
+def backward(x, t, W, eta):
+    '''Returns updated W and sum of losses.'''
+
+    h = forward(x, W)
+    m = x.shape[1]
+    
+    # Softmax.
+    y = np.exp(h[-1])
+    y = np.clip(y, 1e-15, 1e15)  # to avoid division by 0 and log(0)
+    y *= (1 / y.sum(axis=0))
+    
+    # Loss.
+    loss = y[t]  # boolean indexing instead of (t * np.log(x)).sum(axis=0)
+    loss = - np.log(loss)
+    loss = loss.sum() 
+    
+    # Delta for last layer.
+    delta = y - t  
+
+    # Move backward.
+    for l in range(len(W) - 1, 0, -1):
+        
+        Q = np.vstack((h[l-1], np.ones(h[l-1].shape[1]))) @ delta.T
+        delta = W[l][:-1, :] @ delta
+        delta *= h[l-1]>0   #  Adding activation part.
+        W[l] -= (eta/m) * Q  #  Update.
+        
+    # First layer.  
+    Q = np.vstack((x, np.ones(x.shape[1]))) @ delta.T
+    W[0] -= eta * Q
+
+    return W, loss
+
+
+def train(X, T, W, nr_epoch, eta, batchsize=1, losses=[]):
+
+    nr_points = X.shape[1]
+
+    for e in range(nr_epoch):
+    
+        random_order = rg.permutation(range(nr_points))
+        epoch_loss = 0
+
+        for k in range(0, nr_points, batchsize):
+            
+            batch = random_order[k:k+batchsize]  
+            X_batch = X[:, batch]
+            T_batch = T[:, batch]
+
+            W, loss = backward(X_batch, T_batch, W, eta)
+            epoch_loss += loss
+            
+        losses.append(epoch_loss)
+        
+        print(f'\rEpoch {e}, loss {epoch_loss}', end=' ' * 20)
+
+    return W, losses
+
+
+#%% 
+# SCRIPT STARTS HERE
+#%% Test of the data generation
+if __name__ == "__main__":
+
+    from make_data import make_data
+    import matplotlib.pyplot as plt
+
+    #  Data
+    X, T, grid_X, grid_dim = make_data(2, 200, noise = 0.9)
+    nr_points = X.shape[1]
+    X_c = (X - 50)/50
+    grid_X = (grid_X - 50)/50 
+
+    # Initialization.
+    mlp_size = (2, 3, 2)
+    W = initialize(mlp_size)
+
+    # Training parameters.
+    nr_epoch = 100
+    batchsize = 1
+    eta = 0.05
+    losses = []
+
+    #%%
+    # Training.
+    W, losses = train(X_c, T, W, nr_epoch, eta, batchsize=batchsize, losses=losses)
+
+    grid_Y = predict(grid_X, W)
+    prob0 = grid_Y[0].reshape(grid_dim)
+
+    #%%
+    # Visualization.
+    fig, ax = plt.subplots(1, 2)
+    ax[0].plot(losses)
+    ax[0].set_xlabel('Epoch')
+    ax[0].set_ylabel('Loss')
+    ax[1].imshow(prob0, cmap=plt.cm.bwr, vmin=0.45, vmax=0.55)
+
+    ax[1].scatter(X[0][T[0]], X[1][T[0]], c='m', alpha=0.5, s=15)
+    ax[1].scatter(X[0][T[1]], X[1][T[1]], c='g', alpha=0.5, s=15)
+    ax[1].set_aspect('equal', 'box')
+    plt.show()
+
+
+# %%
--- a/Week08/mlp_solution.py
+++ b/Week08/mlp_solution.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Anders B. Dahl, abda@dtu.dk, March 2021 
+"""
+
+#%% Generate and display data
+import make_data
+import numpy as np
+import matplotlib.pyplot as plt
+
+n = 100
+example_nr = 2
+noise = 1.75
+
+X, T, x, dim = make_data.make_data(example_nr, n, noise)
+
+# Standardize
+m = np.mean(X,axis=0)
+s = np.std(X,axis=0)
+Xc = (X-m)/s
+xc = (x-m)/s
+
+
+fig, ax = plt.subplots(1)
+ax.plot(Xc[:n,0],Xc[:n,1],'r.',markersize=10,alpha=0.3)
+ax.plot(Xc[n:,0],Xc[n:,1],'g.',markersize=10,alpha=0.3)
+ax.set_aspect('equal')
+
+
+#%% Forward simple model
+
+# Function for simple forward pass
+def simple_forward(x, W):
+    z = np.c_[x,np.ones((x.shape[0]))]@W[0]
+    h = np.maximum(z,0)
+    yh = np.c_[h,np.ones((x.shape[0]))]@W[1]
+    y = np.exp(yh)/np.sum(np.exp(yh),axis=1,keepdims=True)
+    return y, h
+
+# Function for simple backpropagation
+def simple_backward(x, W, t, learning_rate=0.1):
+    y, h = simple_forward(x,W)
+    L = -np.sum(t*np.log(y + 10e-10))/x.shape[0]
+    # print(L)
+    d1 = y - t
+    q1 = np.c_[h,np.ones((x.shape[0]))].T@d1/y.shape[0]
+    d0 = (h>0)*(d1@W[1].T)[:,:-1]
+    q0 = np.c_[x,np.ones((x.shape[0]))].T@d0/y.shape[0]
+    W[0] -= learning_rate*q0
+    W[1] -= learning_rate*q1
+    return W, L
+
+# Function for simple weight initializaion
+def simple_init_weights(n):
+    W = []
+    W.append(np.random.randn(3,n)*np.sqrt(2/3))
+    W.append(np.random.randn(n+1,2)*np.sqrt(2/(n+1)))
+    return W
+
+W = simple_init_weights(3)
+
+fig, ax = plt.subplots(1)
+n_iter = 50
+L = np.zeros((n_iter))
+i_rng = np.arange(0,n_iter) 
+for i in range(0,n_iter):
+    W, L[i] = simple_backward(Xc,W,T,learning_rate = 0.5)
+    ax.cla()
+    ax.plot(i_rng,L,'k')
+    ax.set_title('Loss')
+    plt.pause(0.001)
+    plt.show()
+    
+    
+y = simple_forward(xc,W)[0]
+
+# Display the result
+Y = y.reshape((100,100,2))
+fig,ax = plt.subplots(1)
+ax.imshow(Y[:,:,1],cmap='pink')
+ax.plot(X[:n,0],X[:n,1],'r.',markersize=10,alpha=0.3)
+ax.plot(X[n:,0],X[n:,1],'g.',markersize=10,alpha=0.3)
+ax.set_aspect('equal')
+
+#%% Varying number of layers
+
+nl = [x.shape[1], 50,50,50,50, 2]
+def init_weights(nl):
+    W = []
+    for i in range(1,len(nl)):
+        W.append(np.random.randn(nl[i-1]+1,nl[i])*np.sqrt(2/(nl[i-1]+1)))
+    return W
+
+W = init_weights(nl)
+
+def forward(x, W):
+    n = len(W)
+    z = []
+    h = []
+    z.append(np.c_[x,np.ones((x.shape[0]))]@W[0])
+    h.append(np.maximum(z[0],0))
+    for i in range(1,n-1):
+        z.append(np.c_[h[i-1],np.ones((x.shape[0]))]@W[i])
+        h.append(np.maximum(z[i],0))
+    
+    yh = np.maximum(np.minimum(np.c_[h[-1],np.ones((x.shape[0]))]@W[-1],600),-600)
+    y = np.exp(yh)/(np.sum(np.exp(yh),axis=1,keepdims=True))
+    return y, h
+
+def backward(x, W, t, learning_rate=0.01, show_learning=False):
+    y,h = forward(x,W)
+    L = -np.sum(t*np.log(y + 10e-7))/x.shape[0]
+    if show_learning:
+        print(L)
+    n = len(W)
+    d = []
+    q = []
+    d.append(y - t)
+    for i in range(1,n):
+        q.append((np.c_[h[n-i-1],np.ones((x.shape[0]))].T@d[i-1])/y.shape[0])
+        d.append((h[n-i-1]>0)*(d[i-1]@W[n-i].T)[:,:-1])
+    q.append((np.c_[x,np.ones((x.shape[0]))].T@d[-1])/y.shape[0])
+
+    for i in range(0,n):
+        W[i] -= learning_rate*q[n-i-1]
+    return W, L
+
+
+
+fig, ax = plt.subplots(1)
+n_iter = 250
+L = np.zeros((n_iter))
+i_rng = np.arange(0,n_iter) 
+for i in range(0,n_iter):
+    W, L[i] = backward(Xc,W,T,learning_rate=0.1,show_learning=True)
+    if ( i%10 == 0):
+        ax.cla()
+        ax.plot(i_rng,L,'k')
+        ax.set_title('Loss')
+        plt.pause(0.001)
+        plt.show()
+
+
+y = forward(xc,W)[0]
+
+Y = y.reshape((100,100,2))
+fig,ax = plt.subplots(1)
+ax.imshow(Y[:,:,1],cmap='pink')
+ax.plot(X[:n,0],X[:n,1],'r.',markersize=10,alpha=0.3)
+ax.plot(X[n:,0],X[n:,1],'g.',markersize=10,alpha=0.3)
+ax.set_aspect('equal')
+
+#%% Now with mini batches
+nl = [x.shape[1], 50,50,50,50, 2]
+
+batch_size = 10
+
+W = init_weights(nl)
+fig, ax = plt.subplots(1)
+n_iter = 21
+L = np.zeros((n_iter))
+i_rng = np.arange(0,n_iter) 
+for i in range(0,n_iter):
+    nb = Xc.shape[0]
+    idx = np.random.permutation(nb)
+    for j in range(0,nb,batch_size):
+        Xb = Xc[idx[j:j+batch_size],:]
+        Tb = T[idx[j:j+batch_size],:]
+        W, l = backward(Xb,W,Tb,learning_rate=0.01,show_learning=True)
+        L[i] += l
+    if ( i%10 == 0):
+        ax.cla()
+        ax.plot(i_rng,L,'k')
+        ax.set_title('Loss')
+        plt.pause(0.001)
+        plt.show()
+
+
+y = forward(xc,W)[0]
+
+Y = y.reshape((100,100,2))
+fig,ax = plt.subplots(1)
+ax.imshow(Y[:,:,1],cmap='pink')
+ax.plot(X[:n,0],X[:n,1],'r.',markersize=10,alpha=0.3)
+ax.plot(X[n:,0],X[n:,1],'g.',markersize=10,alpha=0.3)
+ax.set_aspect('equal')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# %%
--- a/Week08/week08_start_implementation.py
+++ b/Week08/week08_start_implementation.py
+#%%
+import numpy as np
+import matplotlib.pyplot as plt
+import make_data
+%matplotlib tk
+
+
+
+# %%
+example_nr = 1
+n_pts = 1000
+noise = 3
+X, T, x_grid, dim = make_data.make_data(example_nr, n_pts, noise)
+mu = X.mean(axis=1, keepdims=True)
+std = X.std(axis=1, keepdims=True)
+print(mu)
+print(std)
+#%%
+
+X_c = (X-mu)/std
+
+fig, ax = plt.subplots()
+ax.plot(X_c[0,T[0]], X_c[1,T[0]], '.r', alpha=0.3)
+ax.plot(X_c[0,T[1]], X_c[1,T[1]], '.g', alpha=0.3)
+# ax.set_xlim(0, 100)
+# ax.set_ylim(0, 100)
+ax.set_box_aspect(1)
+
+# %%
+n = 5
+x = X_c[:,0:n]
+x.shape
+w = np.sqrt(1/3)
+W = []
+W.append(w*np.random.randn(3,3))
+W.append(w*np.random.randn(4,2))
+
+# %%
+
+x = np.vstack((x, np.ones((1,n))))
+x.shape
+# %%
+
+z = W[0].T@x
+h = np.maximum(0, z)
+y_hat = W[1].T@np.vstack((h, np.ones((1,n))))
+y = np.exp(y_hat)/(np.exp(y_hat).sum(axis=0))
+
+print(y)
+# %%
+dims = [X.shape[0], 3, 2]
+def init(dims):
+    W = []
+    # do something here
+    return W
+
+def forward(X, W):
+    h = None
+    y = None
+    # do something here
+    return y, h
+
+def backward(X, T, W, lr=0.001):
+    y, h = forward(X, W)
+    # do something here
+    return W
+
--- a/Week09/BugNIST2D_overview.py
+++ b/Week09/BugNIST2D_overview.py
+#%%
+
+import os
+import numpy as np
+
+path = '/Users/VAND/Documents/TEACHING/02506/data/bugNIST2D/'  # path to unzipped data directory
+train_filenames = sorted(os.listdir(path + 'train')) 
+train_targets = np.loadtxt(path + 'train_targets.txt', dtype=int) 
+
+
+# %%
+import matplotlib.pyplot as plt
+import PIL.Image
+
+rg = np.random.default_rng()
+
+class_names = [ 
+    'AC: brown cricket', 'BC: black cricket', 'BF: blow fly', 
+    'BL: buffalo beetle larva', 'BP: blow fly pupa',  'CF: curly-wing fly', 
+    'GH: grasshopper', 'MA: maggot', 'ML: mealworm', 
+    'PP: green bottle fly pupa',  'SL: soldier fly larva',  'WO: woodlice'
+    ]
+
+for i in range(12):
+
+    fig, ax = plt.subplots(1, 8, figsize=(15, 5))
+
+    this_class = np.where(train_targets == i)[0]
+    random_subset = sorted(rg.choice(this_class, 8, replace=False))
+
+    for j in range(8):
+        
+        filename = train_filenames[random_subset[j]]
+        image = PIL.Image.open(path + 'train/' + filename)
+
+        ax[j].imshow(image)
+        ax[j].set_title(filename) 
+    
+    fig.suptitle(f'Class {i} ({class_names[i]})')
+    plt.show()
+
+
+# %%
--- a/Week09/BugNIST_train.py
+++ b/Week09/BugNIST_train.py
+#%%
+# Imports and defs
+
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.colors
+import PIL.Image
+import numpy as np
+
+import os
+import sys
+mlp_path = os.path.dirname('/Users/VAND/Documents/TEACHING/02506/exercises/Week08/mlp_basic.py')
+sys.path.append(mlp_path)
+import mlp_basic as mlp
+
+
+def disp(i):
+    '''Helping function whens showing images'''
+    return (i.reshape(imsize) + 1) / 2
+
+
+def perturbe(x, scale=0.1):
+    return x + rg.normal(scale=scale, size=x.shape)
+
+
+def show_confusion_scatter(ax, target, predicted):
+    ax.scatter(perturbe(target), perturbe(predicted), alpha=0.5, s=15)
+    ax.set_xlim(-0.5, 11.5)
+    ax.set_ylim(-0.5, 11.5)
+    ax.set_aspect('equal', 'box')
+    ax.set_xlabel('Target')
+    ax.set_ylabel('Predicted')
+    
+
+def show_confusion_matrix(ax, target, predicted):
+    nr_classes = target.max() + 1
+    nr_points = target.size
+    edges = np.arange(nr_classes + 1) - 0.5
+    cm = np.histogram2d(predicted, target, [edges, edges])[0]
+    i = ax.imshow(cm + 1, cmap=plt.cm.plasma, norm=matplotlib.colors.LogNorm())  # log color
+    ax.set_xlim(edges[0], edges[-1])
+    ax.set_ylim(edges[0], edges[-1])
+    ax.set_aspect('equal', 'box')
+    ax.set_xlabel('Target')
+    ax.set_ylabel('Predicted')
+
+
+def evaluate_result(W, X_train, T_train, X_validate, T_validate, losses_running, losses_validate, losses_batch):
+
+    Y_train = mlp.predict(X_train, W)
+    predicted_train = np.argmax(Y_train, axis=0)
+    target_train= np.argmax(T_train, axis=0)
+    accuracy_train = (predicted_train==target_train).sum()/target_train.size
+
+    Y_validate = mlp.predict(X_validate, W)
+    loss_validate = Y_validate[T_validate]  # boolean indexing instead of (t * np.log(x)).sum(axis=0)
+    loss_validate = - np.log(loss_validate)
+    predicted_validate = np.argmax(Y_validate, axis=0)
+    target_validate = np.argmax(T_validate, axis=0)
+    accuracy_validate = (predicted_validate==target_validate).sum()/target_validate.size
+
+    losses_validate.append(loss_validate.mean())
+
+    # Visualization.
+    fig = plt.figure()
+    gs = fig.add_gridspec(2, 2)
+    ax1 = fig.add_subplot(gs[1, 0])
+    ax2 = fig.add_subplot(gs[1, 1])
+    ax = fig.add_subplot(gs[0, :])
+
+    timestamp = np.arange(len(losses_running))
+    ax.plot(losses_batch[0], losses_batch[1], lw=0.2, alpha=0.5, label='Batches')
+    ax.plot(timestamp + 0.5, losses_running, lw=0.5, label='Train (running)')
+    ax.plot(timestamp + 1, losses_validate, lw=0.5, label='Validate')
+    ax.set_ylim(0, losses_running[0])
+    ax.set_xlabel('Epoch')
+    ax.set_ylabel('Loss') 
+    ax.legend()
+
+    show_confusion_matrix(ax1, target_train, predicted_train)
+    ax1.set_title(f'Train: {int(accuracy_train*100):d}%')
+    show_confusion_matrix(ax2, target_validate, predicted_validate)
+    ax2.set_title(f'Validate: {int(accuracy_validate*100):d}%')
+
+    fig.suptitle(f'Epoch {len(losses_running)}')
+    plt.tight_layout()
+    plt.show()
+
+    return losses_validate
+
+
+
+#%% 
+# Set-up data
+
+path = '/Users/VAND/Documents/TEACHING/02506/data/bugNIST2D/'  # path to unzipped data directory
+train_filenames = sorted(os.listdir(path + 'train')) 
+train_targets = np.loadtxt(path + 'train_targets.txt', dtype=int) 
+
+X_train = np.stack([np.array(PIL.Image.open(path + 'train/' + filename)) for filename in train_filenames], axis=-1)
+imsize = X_train.shape[:2]
+nr_images = X_train.shape[3]
+X_train = 2/255 * X_train.reshape(-1, nr_images).astype('float') - 1
+
+I = np.eye(12, dtype=bool)
+T_train = I[train_targets].T
+
+split = 20000
+X_validate = X_train[:, split:]
+T_validate = T_train[:, split:]
+
+X_train = X_train[:, :split]
+T_train = T_train[:, :split]
+
+
+#%% 
+# Initialize MLP.
+
+nr_show = 8
+rg = np.random.default_rng()
+
+mlp_size = (X_train.shape[0], 256, 512, 256, T_train.shape[0])
+W = mlp.initialize(mlp_size)
+losses_running = []
+losses_validate = []
+losses_batch = [[], []]
+
+#%% 
+# Set training parameters.
+nr_epoch = 200
+batchsize = 20
+eta = 0.001
+
+#%%
+# Train.
+
+nr_points = X_train.shape[1]
+
+for e in range(nr_epoch):
+
+    random_order = rg.permutation(range(nr_points))
+    epoch_loss = 0
+
+    for k in range(0, nr_points, batchsize):
+        
+        batch = random_order[k:k+batchsize]  
+        X_batch = X_train[:, batch]
+        T_batch = T_train[:, batch]
+
+        W, loss = mlp.backward(X_batch, T_batch, W, eta)
+        epoch_loss += loss
+        losses_batch[0].append(e + k/nr_points)
+        losses_batch[1].append(loss/X_batch.shape[1])
+
+    losses_running.append(epoch_loss/nr_points)
+    losses_validate = evaluate_result(W, X_train, T_train, X_validate, T_validate, losses_running, losses_validate, losses_batch)
+    
+    #print(f'\rEpoch {e}, loss {epoch_loss}', end=' ' * 20)
+
+
+#%% 
+# Testing
+
+test_filenames = sorted(os.listdir(path + 'test')) 
+test_targets = np.loadtxt(path + 'test_targets.txt', dtype=int) 
+X_test = np.stack([np.array(PIL.Image.open(path + 'test/' + filename)) for filename in test_filenames], axis=-1)
+X_test = 2/255 * X_test.reshape(-1, X_test.shape[-1]).astype('float') - 1
+T_test = I[test_targets].T
+
+
+Y_test = mlp.predict(X_test, W)
+loss_test = Y_test[T_test]  # boolean indexing instead of (t * np.log(x)).sum(axis=0)
+loss_test = - np.log(loss_test)
+predicted_test = np.argmax(Y_test, axis=0)
+target_test = np.argmax(T_test, axis=0)
+accuracy_test = (predicted_test==target_test).sum()/target_test.size
+
+fig, ax = plt.subplots()
+show_confusion_matrix(ax, target_test, predicted_test)
+ax.set_title(f'Test: {int(accuracy_test*100):d}%')
+
+
+# %%
+I = np.array([1, 2, 4, 9, 6, 5, 3, 8, 11, 7, 10, 0]) ## which place it should move to
+
+fig, ax = plt.subplots()
+show_confusion_matrix(ax, I[target_test], I[predicted_test])
+ax.set_title(f'Test: {int(accuracy_test*100):d}%')
+
+# %%
--- a/Week09/data_overview.py
+++ b/Week09/data_overview.py
+#%%
+
+import numpy as np
+
+#%%
+folder = '/Users/VAND/Documents/TEACHING/02506/data/week9_MNIST_data/'
+d = np.load(folder + 'MNIST_target_train.npy')
+
+
+#%%
+
+
+folder = '/Users/VAND/Documents/TEACHING/02506/data/week9_CIFAR-10_data/'
+d = np.load(folder + 'CIFAR10_target_train.npy')
+
+
+# %%
+file = '/Users/VAND/Documents/TEACHING/02506/data/cifar-10-batches-py/data_batch_1'
+
+def unpickle(file):
+    import pickle
+    with open(file, 'rb') as fo:
+        dict = pickle.load(fo, encoding='bytes')
+    return dict
+
+d = unpickle(file)
+
+d[b'data'].shape
\ No newline at end of file
--- a/Week09/digits_classification.py
+++ b/Week09/digits_classification.py
+#%%
+import numpy as np
+import matplotlib.pyplot as plt
+import sklearn.datasets
+
+#%% importing mlp from week 8
+import os
+import sys
+
+mlp_path = os.path.dirname('/Users/VAND/Documents/TEACHING/02506/exercises/Week08/mlp_basic.py')
+sys.path.append(mlp_path)
+
+import mlp_basic as mlp
+
+#%%
+
+
+digits = sklearn.datasets.load_digits()
+images = digits['images']
+targets = digits['target']
+
+X = images.reshape((images.shape[0], -1)).T
+h = X.max()/2
+X = (X - h)/h
+I = np.eye(10, dtype=bool)
+T = I[targets].T
+
+def disp(i):
+    '''Helping function whens showing images'''
+    return (i.reshape(8, 8) + 1) / 2
+
+nr_images = images.shape[0]
+nr_show = 8
+rg = np.random.default_rng()
+random_subset = sorted(rg.choice(range(nr_images), nr_show, replace=False))
+
+fig, ax = plt.subplots(1, nr_show)
+for s, a in zip(random_subset, ax):
+    a.imshow(disp(X[:, s]))
+    a.set_title(f'Im. {s}\nTarget {np.argmax(T[:, s])}')
+plt.show()
+
+
+#%% Make testing and training set
+train_percentage = 0.5
+permuted = rg.permutation(range(nr_images))
+c = int(nr_images*train_percentage)
+
+train = sorted(permuted[:c])
+test = sorted(permuted[c:])
+
+
+X_train= X[:, train]
+T_train= T[:, train]
+
+X_test = X[:, test]
+T_test = T[:, test]
+
+
+#%% Initialization.
+mlp_size = (X_train.shape[0], 58, 22, T_train.shape[0])
+W = mlp.initialize(mlp_size)
+
+# Training parameters.
+nr_epoch = 10
+batchsize = 5
+eta = 0.01
+losses = []
+losses_test = []
+
+#%%
+# Training.
+
+nr_points = X_train.shape[1]
+
+for e in range(nr_epoch):
+
+    random_order = rg.permutation(range(nr_points))
+    epoch_loss = 0
+
+    for k in range(0, nr_points, batchsize):
+        
+        batch = random_order[k:k+batchsize]  
+        X_batch = X_train[:, batch]
+        T_batch = T_train[:, batch]
+
+        W, loss = mlp.backward(X_batch, T_batch, W, eta)
+        epoch_loss += loss
+
+    losses.append(epoch_loss/nr_points)
+    
+    Y_test = mlp.predict(X_test, W)
+    loss_test = Y_test[T_test]  # boolean indexing instead of (t * np.log(x)).sum(axis=0)
+    loss_test = - np.log(loss_test)
+    losses_test.append(loss_test.mean())
+
+    
+    print(f'\rEpoch {e}, loss {epoch_loss}', end=' ' * 20)
+
+
+#%%
+
+Y_train = mlp.predict(X_train, W)
+predicted_train = np.argmax(Y_train, axis=0)
+target_train= np.argmax(T_train, axis=0)
+accuracy_train = (predicted_train==target_train).sum()/target_train.size
+
+Y_test = mlp.predict(X_test, W)
+predicted_test = np.argmax(Y_test, axis=0)
+target_test = np.argmax(T_test, axis=0)
+accuracy_test = (predicted_test==target_test).sum()/target_test.size
+
+#%%
+
+def perturbe(x, scale=0.1):
+    return x + rg.normal(scale=scale, size=x.shape)
+
+# Visualization.
+fig, ax = plt.subplots(1, 3)
+ax[0].plot(losses, label='train')
+ax[0].plot(losses_test, label='test')
+
+ax[0].set_xlabel('Epoch')
+ax[0].set_ylabel('Loss') 
+
+
+ax[1].scatter(perturbe(target_train), perturbe(predicted_train), alpha=0.5, s=15)
+ax[1].set_aspect('equal', 'box')
+ax[1].set_xlabel('Target')
+ax[1].set_ylabel('Predicted')
+ax[1].set_title(f'Train: {int(accuracy_train*100):d}%')
+
+ax[2].scatter(perturbe(target_test), perturbe(predicted_test), alpha=0.5, s=15)
+ax[2].set_aspect('equal', 'box')
+ax[2].set_xlabel('Target')
+ax[2].set_ylabel('Predicted')
+ax[2].set_title(f'Test: {int(accuracy_test*100):d}%')
+plt.show()
+
+#%% Check where it goes wrong
+misses = np.where(target_test != predicted_test)[0]
+nr_show = min(nr_show, len(misses))
+
+if nr_show>0:
+    fig, ax = plt.subplots(1, nr_show)
+    for i, a in zip(misses, ax):
+        a.imshow(disp(X_test[:, i]))
+        tr = np.argmax(T_test[:, i])
+        pr = np.argmax(Y_test[:, i])
+        a.set_title(f'Predicted {pr}\nTarget {tr}')
+    plt.show()
+
+
+
+# %%
--- a/Week09/digits_overview.py
+++ b/Week09/digits_overview.py
+#%%
+import numpy as np
+import matplotlib.pyplot as plt
+import sklearn.datasets
+
+digits = sklearn.datasets.load_digits()
+images = digits['images']
+targets = digits['target']
+
+X = images.reshape((images.shape[0], -1)).T
+h = X.max()/2
+X = (X - h)/h
+I = np.eye(10, dtype=bool)
+T = I[targets].T
+
+def disp(i):
+    '''Helping function whens showing images'''
+    return (i.reshape(8, 8) + 1) / 2
+
+nr_images = images.shape[0]
+nr_show = 8
+rg = np.random.default_rng()
+random_subset = sorted(rg.choice(range(nr_images), nr_show, replace=False))
+
+fig, ax = plt.subplots(1, nr_show)
+for s, a in zip(random_subset, ax):
+    a.imshow(disp(X[:, s]))
+    a.set_title(f'Im. {s}\nTarget {np.argmax(T[:, s])}')
+plt.show()
+
+#%% 
+# Dividing data into training and test set. The same apprach can be used for
+# dividing into training and validation.
+
+
+train_percentage = 0.8
+permuted = rg.permutation(range(nr_images))
+c = int(nr_images*train_percentage)
+
+train = sorted(permuted[:c])
+test = sorted(permuted[c:])
+
+X_train= X[:, train]
+T_train= T[:, train]
+
+X_test = X[:, test]
+T_test = T[:, test]
+
--- a/Week09/mnist_classification.py
+++ b/Week09/mnist_classification.py
+#%%
+import gzip
+import shutil
+import os
+import wget
+
+
+# This will get and unpack mnist files in the specified folder.
+# Alternatively, download, unpack and place in folder manually. 
+# http://yann.lecun.com/exdb/mnist/train-images-id3-ubyte.gz
+# http://yann.lecun.com/exdb/mnist/train-labels-id1-ubyte.gz
+# http://yann.lecun.com/exdb/mnist/t10k-images-id3-ubyte.gz
+# http://yann.lecun.com/exdb/mnist/t10k-labels-id1-ubyte.gz
+
+mnist_folder = '/Users/VAND/Documents/TEACHING/02506/data/mnist_data'
+
+if not os.path.isdir(mnist_folder):
+    print('Getting data...')
+    os.mkdir(mnist_folder)
+    for f in ['train-images-idx3-ubyte', 'train-labels-idx1-ubyte', 
+              't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte']:
+        url = 'http://yann.lecun.com/exdb/mnist/' + f + '.gz'
+        temp = wget.download(url) 
+        with gzip.open(temp, 'rb') as f_in:  
+            with open(os.path.join(mnist_folder, f), 'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+        os.remove(temp)
+else:
+     print('Has data.')
+
+def disp(i):
+    '''Helping function whens showing images'''
+    return (i.reshape(28, 28) + 1) / 2
+
+#%%
+import mnist
+import numpy as np
+
+mndata = mnist.MNIST(mnist_folder)
+X_train, T_train = mndata.load_training()
+X_test, T_test = mndata.load_testing()
+
+X_train = (2/255) * np.array(X_train, dtype=float).T - 1 
+X_test = (2/255) * np.array(X_test, dtype=float).T  - 1
+
+I = np.eye(10, dtype=bool)
+T_train = I[T_train].T
+T_test = I[T_test].T
+
+
+#%%
+#%% importing mlp from week 8
+import os
+import sys
+
+mlp_path = os.path.dirname('/Users/VAND/Documents/TEACHING/02506/exercises/Week08/mlp_basic.py')
+sys.path.append(mlp_path)
+
+import mlp_basic as mlp
+
+
+#%%
+X_train -= X_train.mean(axis=0)
+X_train /= X_train.std(axis=0)
+
+X_test -= X_test.mean(axis=0)
+X_test /= X_test.std(axis=0)
+
+#%%
+mlp_size = (X_train.shape[0], 58, 22, T_train.shape[0])
+W = mlp.initialize(mlp_size)
+
+# Training parameters.
+nr_epoch = 10
+batchsize = 5
+eta = 0.01
+losses = []
+losses_test = []
+
+#%%
+# Training.
+
+rg = np.random.default_rng()
+
+nr_points = X_train.shape[1]
+
+for e in range(nr_epoch):
+
+    random_order = rg.permutation(range(nr_points))
+    epoch_loss = 0
+
+    for k in range(0, nr_points, batchsize):
+        
+        batch = random_order[k:k+batchsize]  
+        X_batch = X_train[:, batch]
+        T_batch = T_train[:, batch]
+        
+        W, loss = mlp.backward(X_batch, T_batch, W, eta)
+        epoch_loss += loss
+
+    losses.append(epoch_loss/nr_points)
+    
+    Y_test = mlp.predict(X_test, W)
+    loss_test = Y_test[T_test]  # boolean indexing instead of (t * np.log(x)).sum(axis=0)
+    loss_test = - np.log(loss_test)
+    losses_test.append(loss_test.mean())
+
+    
+    print(f'\rEpoch {e}, loss {epoch_loss}', end=' ' * 20)
+
+
+#%%
+
+Y_train = mlp.predict(X_train, W)
+predicted_train = np.argmax(Y_train, axis=0)
+target_train= np.argmax(T_train, axis=0)
+accuracy_train = (predicted_train==target_train).sum()/target_train.size
+
+Y_test = mlp.predict(X_test, W)
+predicted_test = np.argmax(Y_test, axis=0)
+target_test = np.argmax(T_test, axis=0)
+accuracy_test = (predicted_test==target_test).sum()/target_test.size
+
+#%%
+import matplotlib.pyplot as plt
+
+def perturbe(x, scale=0.1):
+    return x + rg.normal(scale=scale, size=x.shape)
+
+# Visualization.
+fig, ax = plt.subplots(1, 3)
+ax[0].plot(losses, label='train')
+ax[0].plot(losses_test, label='test')
+
+ax[0].set_xlabel('Epoch')
+ax[0].set_ylabel('Loss') 
+
+
+ax[1].scatter(perturbe(target_train), perturbe(predicted_train), alpha=0.5, s=15)
+ax[1].set_aspect('equal', 'box')
+ax[1].set_xlabel('Target')
+ax[1].set_ylabel('Predicted')
+ax[1].set_title(f'Train: {int(accuracy_train*100):d}%')
+
+ax[2].scatter(perturbe(target_test), perturbe(predicted_test), alpha=0.5, s=15)
+ax[2].set_aspect('equal', 'box')
+ax[2].set_xlabel('Target')
+ax[2].set_ylabel('Predicted')
+ax[2].set_title(f'Test: {int(accuracy_test*100):d}%')
+plt.show()
+
+#%% Check where it goes wrong
+
+nr_show= 8
+
+
+
+
+misses = np.where(target_test != predicted_test)[0]
+nr_show = min(nr_show, len(misses))
+
+if nr_show>0:
+    fig, ax = plt.subplots(1, nr_show)
+    for i, a in zip(misses, ax):
+        a.imshow(disp(X_test[:, i]))
+        tr = np.argmax(T_test[:, i])
+        pr = np.argmax(Y_test[:, i])
+        a.set_title(f'{pr} ({tr})')
+    plt.show()
+
+# %%
--- a/Week10/QUIZ_week10_solution.ipynb
+++ b/Week10/QUIZ_week10_solution.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "608c7b2a",
+   "metadata": {},
+   "source": [
+    "# QUIZ WEEK 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "192375c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "77976 246 4704\n"
+     ]
+    }
+   ],
+   "source": [
+    "# FIRST LAYER\n",
+    "W1 = 5*5*1*3 # nr. kernel weights (one since input is grayscale)\n",
+    "B1 = 3 # nr. biases, one per kernel\n",
+    "O1 = 116*116*3 # output of the first layer (120-4=116)\n",
+    "\n",
+    "# SECOND LAYER\n",
+    "W2 = 3*3*3*6 # nr. kernel weights (4 since input has 4 channels)\n",
+    "B2 = 6 # nr biases\n",
+    "O2 = 114*114*6 # output of the second layer (116-2=114)\n",
+    "\n",
+    "# FIRST LAYER WITH POOLING\n",
+    "O1 = 116*116*3 # output of the first layer\n",
+    "P1 = 58*58*3 # after pooling\n",
+    "\n",
+    "# SECOND LAYER WITH POOLING\n",
+    "O2 = 56*56*6 # output of the second layer (58-2=56)\n",
+    "P2 = 28*28*6 # after pooling\n",
+    "\n",
+    "# FINAL QUIZ ANSWERS\n",
+    "A = 114*114*6\n",
+    "B = 5*5*1*3 + 3 + 3*3*3*6 + 6\n",
+    "C = 28*28*6\n",
+    "print(A, B, C)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d52b2de",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+%% Cell type:markdown id:608c7b2a tags:
+
+# QUIZ WEEK 10
+
+%% Cell type:code id:192375c1 tags:
+
+``` python
+# FIRST LAYER
+W1 = 5*5*1*3 # nr. kernel weights (one since input is grayscale)
+B1 = 3 # nr. biases, one per kernel
+O1 = 116*116*3 # output of the first layer (120-4=116)
+
+# SECOND LAYER
+W2 = 3*3*3*6 # nr. kernel weights (4 since input has 4 channels)
+B2 = 6 # nr biases
+O2 = 114*114*6 # output of the second layer (116-2=114)
+
+# FIRST LAYER WITH POOLING
+O1 = 116*116*3 # output of the first layer
+P1 = 58*58*3 # after pooling
+
+# SECOND LAYER WITH POOLING
+O2 = 56*56*6 # output of the second layer (58-2=56)
+P2 = 28*28*6 # after pooling
+
+# FINAL QUIZ ANSWERS
+A = 114*114*6
+B = 5*5*1*3 + 3 + 3*3*3*6 + 6
+C = 28*28*6
+print(A, B, C)
+```
+
+%% Output
+
+    77976 246 4704
+
+%% Cell type:code id:2d52b2de tags:
+
+``` python
+```
--- a/Week10/README.md
+++ b/Week10/README.md
+# Material for Chapter 10
+
+The notebook the exercise in Chapter 10 is available from here:
+[Notebook for mini U-net](https://github.com/vedranaa/teaching-notebooks/blob/main/02506_week10_MiniUnet.ipynb)
+
+You can aslo open it directly in Google Colab from here:
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vedranaa/teaching-notebooks/blob/main/02506_week10_MiniUnet.ipynb)
+
+
+## Solution for Chapter 10
+
+The solution the exercise in Chapter 10 is available from here:
+[Mini U-net solutions](https://github.com/vedranaa/teaching-notebooks/blob/main/02506_week10_MiniUnet_Solutions.ipynb)
+
+You can aslo open it directly in Google Colab from here:
+[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vedranaa/teaching-notebooks/blob/main/02506_week10_MiniUnet_Solutions.ipynb)