Updated structure of lecture plan and file names (needs to be double checked)

d004d8ad · bjje · 9eadc843 · d004d8ad · d004d8ad · d004d8ad
Commit d004d8ad authored 5 months ago by bjje
--- a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_2.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_2.m
+%% exercise 3.1.2
+cdir = fileparts(mfilename('fullpath')); 
+[A, D] = tmg(fullfile(cdir,'../Data/textDocs.txt'));
+X = full(A)';
+attributeNames = cellstr(D); 
+
+%% Display the result
+display(attributeNames);
+display(X);  
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_3.m
+%% exercise 3.1.3
cdir = fileparts(mfilename('fullpath')); 
TMGOpts.stoplist = fullfile(cdir,'../Data/stopWords.txt');
[A, D] = tmg(fullfile(cdir,'../Data/textDocs.txt'), TMGOpts);
X = full(A)';
attributeNames = cellstr(D);

%% Display the result
display(attributeNames);
display(X);
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_4.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_4.m
+%% exercise 3.1.4
cdir = fileparts(mfilename('fullpath')); 
TMGOpts.stoplist = '../Data/stopWords.txt';
TMGOpts.stemming = 1;
[A, D] = tmg(fullfile(cdir,'../Data/textDocs.txt'), TMGOpts);
X = full(A)';
attributeNames = cellstr(D);

%% Display the result
display(attributeNames);
display(X);
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex1_6_5.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex1_6_5.m
+%% exercise 3.1.5
+
+% Query vector
+q = [0; 0; 0; 0; 0; 0; 0; 1; 0; 0; 0; 0; 1; 1; 0; 0; 0]';
+
+%% Method #1 (a for loop)
+N = size(X,1); % Get the number of data objects
+sim = nan(N,1); % Allocate a vector for the similarity
+for i = 1:N
+    x = X(i,:); % Get the i'th data object
+    sim(i) = dot(q/norm(q),x/norm(x)); % Compute cosine similarity
+end
+
+%% Method #2 (one compact line of code)
+sim = (q*X')'./(sqrt(sum(X.^2,2))*sqrt(sum(q.^2)));
+
+%% Method #3 (use the "similarity" function)
+sim = similarity(X, q, 'cos');
+
+%% Display the result
+display(sim);
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_1_1.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_1_1.m
-%% exercise 2.1.1
-% Load the data into Matlab
-cdir = fileparts(mfilename('fullpath')); 
-[NUMERIC, TXT, RAW] = xlsread(fullfile(cdir,'../Data/nanonose.xls'));
+% exercise 3.2.1

-% Extract the rows and columns corresponding to the sensor data, and
-% transpose the matrix to have rows correspond to data items
-X = NUMERIC(:,3:10);
+x = [-0.68; -2.11; 2.39; 0.26; 1.46; 1.33; 1.03; -0.41; -0.33; 0.47];

-% Extract attribute names from the first column
-attributeNames = RAW(1,4:end);
-
-% Extract unique class names from the first row
-classLabels = RAW(3:end,1);
-classNames = unique(classLabels);
-
-% Extract class labels that match the class names
-[y_,y] = ismember(classLabels, classNames); y = y-1;
+mean_x = mean(x);
+std_x = std(x);
+median_x = median(x);
+range_x = range(x);

+%% Display results
+display(mean_x);
+display(std_x);
+display(median_x);
+display(range_x);
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_1_2.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_1_2.m
-%% exercise 2.1.2
-% Data attributes to be plotted
-i = 1;
-j = 2;
-
-% Make a simple plot of the i'th attribute against the j'th attribute
-mfig('NanoNose: Data'); clf;
-plot(X(:,i), X(:,j),'o');
-axis tight
-
-% Make another more fancy plot that includes legend, class labels, 
-% attribute names, and a title
-mfig('NanoNose: Classes'); clf; hold all; 
-C = length(classNames);
-% Use a specific color for each class (easy to reuse across plots!):
-colors = get(gca, 'colororder'); 
-% Here we the standard colours from MATLAB, but you could define you own.
-for c = 0:C-1
-    h = scatter(X(y==c,i), X(y==c,j), 50, 'o', ...
-                'MarkerFaceColor', colors(c+1,:), ...
-                'MarkerEdgeAlpha', 0, ...
-                'MarkerFaceAlpha', .5);
-end
-% You can also avoid the loop by using e.g.: (but in this case, do not call legend(classNames) as it will overwrite the legend with wrong entries) 
-% gscatter(X(:,i), X(:,j), classLabels)
-legend(classNames);
-axis tight
-xlabel(attributeNames{i});
-ylabel(attributeNames{j});
-title('NanoNose data');
-a = 234
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_1_3.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_1_3.m
-
-%% exercise 2.1.3
-
-% Subtract the mean from the data
-Y = bsxfun(@minus, X, mean(X));
-
-% Obtain the PCA solution by calculate the SVD of Y
-[U, S, V] = svd(Y);
-
-% Compute variance explained
-rho = diag(S).^2./sum(diag(S).^2);
-threshold = 0.90;
-
-% Plot variance explained
-mfig('NanoNose: Var. explained'); clf;
-hold on
-plot(rho, 'x-');
-plot(cumsum(rho), 'o-');
-plot([0,length(rho)], [threshold, threshold], 'k--');
-legend({'Individual','Cumulative','Threshold'}, ...
-        'Location','best');
-ylim([0, 1]);
-xlim([1, length(rho)]);
-grid minor
-xlabel('Principal component');
-ylabel('Variance explained value');
-title('Variance explained by principal components');
-
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_1_4.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_1_4.m
-%% exercise 2.1.4
-
-% Index of the principal components
-i = 1;
-j = 2;
-
-% Compute the projection onto the principal components
-Z = U*S;
-
-% Plot PCA of data
-mfig('NanoNose: PCA Projection'); clf; hold all; 
-C = length(classNames);
-colors = get(gca,'colororder');
-for c = 0:C-1
-    scatter(Z(y==c,i), Z(y==c,j), 50, 'o', ...
-                'MarkerFaceColor', colors(c+1,:), ...
-                'MarkerEdgeAlpha', 0, ...
-                'MarkerFaceAlpha', .5);
-end
-legend(classNames);
-axis tight
-xlabel(sprintf('PC %d', i));
-ylabel(sprintf('PC %d', j));
-title('PCA Projection of NanoNose data');
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_1_5.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_1_5.m
-%% exercise 2.1.5
-% We saw in 2.1.3 that the first 3 components explaiend more than 90
-% percent of the variance. Let's look at their coefficients:
-pcs = 1:3; % change this to look at more/fewer, or compare e.g. [2,5]
-mfig('NanoNose: PCA Component Coefficients');
-h = bar(V(:,pcs));
-legendCell = cellstr(num2str(pcs', 'PC%-d'));
-legend(legendCell, 'location','best');
-xticklabels(attributeNames);
-grid
-xlabel('Attributes');
-ylabel('Component coefficients');
-title('NanoNose: PCA Component Coefficients');
-
-% Inspecting the plot, we see that the 2nd principal component has large
-% (in magnitude) coefficients for attributes A, E and H. We can confirm
-% this by looking at it's numerical values directly, too:
-disp('PC2:')
-disp(V(:,2)') % notice the transpose for display in console 
-
-% How does this translate to the actual data and its projections?
-% Looking at the data for water:
-
-% Projection of water class onto the 2nd principal component.
-all_water_data = Y(y==4,:);
-
-disp('First water observation:')
-disp(all_water_data(1,:))
-
-% Based on the coefficients and the attribute values for the observation
-% displayed, would you expect the projection onto PC2 to be positive or
-% negative - why? Consider *both* the magnitude and sign of *both* the
-% coefficient and the attribute!
-
-% You can determine the projection by (remove comments):
-disp('...and its projection onto PC2:')
-disp(all_water_data(1,:) * V(:,2))
-% Try to explain why?
-
-
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_2_1.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_2_1.m
-%% exercise 2.2.1
+% exercise 3.3.1

-% Digit number to display
+% Image to use as query
 i = 1;

-% Load data
+% Similarity: 'SMC', 'Jaccard', 'ExtendedJaccard', 'Cosine', 'Correlation' 
+SimilarityMeasure = 'cos';
+
+%% Load the CBCL face database
 cdir = fileparts(mfilename('fullpath')); 
-load(fullfile(cdir,'../Data/zipdata.mat'));
-
-% Extract digits data
-X = traindata(:,2:end);
-y = traindata(:,1);
-
-% Visualize the i'th digit as a vector
-mfig('Digits: Data');
-subplot(4,1,4);
-imagesc(X(i,:));
-xlabel('Pixel number');
-title('Digit in vector format');
-set(gca, 'YTick', []);
-
-% Visualize the i'th digit as an image
-subplot(4,1,1:3);
-I = reshape(X(i,:), [16,16])';
-imagesc(I);
-colormap(1-gray);
-axis image off
-title('Digit as a image');
+load(fullfile(cdir,'../Data/digits.mat'));
+
+% You can try out the faces CBCL face database, too:
+%load(fullfile(cdir,'../Data/wildfaces_grayscale.mat'));
+
+transpose = true; % set to true if plotted images needs to be transposed
+
+[N,M] = size(X);
+imageDim = [sqrt(M),sqrt(M)];
+%% Search the face database for similar faces
+
+% Index of all other images than i
+noti = [1:i-1 i+1:N]; 
+% Compute similarity between image i and all others
+sim = similarity(X(i,:), X(noti,:), SimilarityMeasure);
+% Sort similarities
+[val, j] = sort(sim, 'descend');
+
+%% Plot query and result
+mfig('Faces: Query'); clf;
+subplot(3,5,1:5);
+
+img = reshape(X(i,:),imageDim);
+if transpose; img = img'; end;
+imagesc(img);
+
+axis image
+set(gca, 'XTick', [], 'YTick', []);
+ylabel(sprintf('Image #%d', i));
+title('Query image','FontWeight','bold');
+for k = 1:5
+    subplot(3,5,k+5)
+    ii = noti(j(k));
+    img = reshape(X(ii,:),imageDim);
+    if transpose; img = img'; end;
+    imagesc(img);
+    axis image
+    set(gca, 'XTick', [], 'YTick', []);
+    xlabel(sprintf('sim=%.2f', val(k)));
+    ylabel(sprintf('Image #%d', ii));
+    if k==3, title('Most similar images','FontWeight','bold'); end;
+end
+for k = 1:5
+    subplot(3,5,k+10)
+    ii = noti(j(end+1-k));
+    img = reshape(X(ii,:),imageDim);
+    if transpose; img = img'; end;
+    imagesc(img);
+    axis image
+    set(gca, 'XTick', [], 'YTick', []);
+    xlabel(sprintf('sim=%.3f', val(end+1-k)));
+    ylabel(sprintf('Image #%d', ii));
+    if k==3, title('Least similar images','FontWeight','bold'); end;
+end
+colormap(gray);
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_2_2.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_2_2.m
-%% exercise 2.2.2
-
-% Digits to include in analysis (to include all, n = 1:10);
-n = [0,1];
-
-% Number of principal components for reconstruction
-K = 22;
-
-% Digits to visualize
-nD = 1:5;
-
-%% Load data
-cdir = fileparts(mfilename('fullpath')); 
-load(fullfile(cdir,'../Data/zipdata.mat'));
-
-% Extract digits
-X = traindata(:,2:end);
-y = traindata(:,1);
-classNames = {'0';'1';'2';'3';'4';'5';'6';'7';'8';'9';'10'};
-classLabels = classNames(y+1);
-
-% Remove digits that are not to be inspected
-j = ismember(y, n);
-X = X(j,:);
-classLabels = classLabels(j);
-classNames = classNames(n+1);
-y = cellfun(@(str) find(strcmp(str, classNames)), classLabels)-1;
-
-% Subtract the mean from the data
-Y = bsxfun(@minus, X, mean(X));
-
-% Obtain the PCA solution by calculate the SVD of Y
-[U, S, V] = svd(Y, 'econ');
-
-% Compute the projection onto the principal components
-Z = U*S;
-
-% Compute variance explained
-rho = diag(S).^2./sum(diag(S).^2);
-
-%% Plot variance explained
-mfig('Digits: Var. explained'); clf;
-plot(rho, 'o-');
-title('Variance explained by principal components');
-xlabel('Principal component');
-ylabel('Variance explained value');
-
-%% Plot PCA of data
-mfig('Digits: PCA'); clf; hold all; 
-C = length(classNames);
-for c = 0:C-1
-    plot(Z(y==c,1), Z(y==c,2), 'o');
-end
-legend(classNames);
-xlabel('PC 1');
-ylabel('PC 2');
-title('PCA of digits data');
-
-%% Visualize the reconstructed data from the firts K principal components
-mfig('Digits: Reconstruction'); clf;
-W = Z(:,1:K)*V(:,1:K)';
-D = length(nD);
-for d = 1:D
-    subplot(2,D,d);
-    I = reshape(X(nD(d),:), [16,16])';
-    imagesc(I);
-    axis image off
-    title('Original');
-    subplot(2,D,d+D);
-    I = reshape(W(nD(d),:)+mean(X), [16,16])';
-    imagesc(I);
-    axis image off
-    title('Reconstructed');
-end
-colormap(1-gray);
-
-%% Visualize the principal components
-mfig('Digits: Principal components'); clf;
-for k = 1:K
-    N1 = ceil(sqrt(K)); N2 = ceil(K/N1);
-    subplot(N2, N1, k);
-    I = reshape(V(:,k), [16,16])';
-    imagesc(I);
-    colormap(hot);
-    axis image off
-    title(sprintf('PC %d',k));
-end
+% exercise 3.3.2
+
+% Generate two data objects with M random attributes
+M = 5;
+x = rand(1,M);
+y = rand(1,M);
+
+% Two constants
+a = 1.5;
+b = 1.5;
+
+% Check the statements in the exercise
+similarity(x,y,'cos') - similarity(a*x,y,'cos')
+similarity(x,y,'ext') - similarity(a*x,y,'ext')
+similarity(x,y,'cor') - similarity(a*x,y,'cor')
+similarity(x,y,'cos') - similarity(b+x,y,'cos')
+similarity(x,y,'ext') - similarity(b+x,y,'ext')
+similarity(x,y,'cor') - similarity(b+x,y,'cor')
\ No newline at end of file
--- a/exercises/02450Toolbox_Matlab/Scripts/ex2_3_1.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex2_3_1.m
-%% exercise 2.3.1
-%% Load data
-cdir = fileparts(mfilename('fullpath')); 
-load(fullfile(cdir,'../Data/zipdata.mat'));
-
-% Extract digits (training set)
-X = traindata(:,2:end);
-y = traindata(:,1);
+% exercise 4.2.1

-% Extract digits (test set)
-Xtest = testdata(:,2:end);
-ytest = testdata(:,1);
+% Disable xlsread warning
+warning('off', 'MATLAB:xlsread:ActiveX'); 
+warning('off', 'MATLAB:xlsread:Mode'); 

-% Subtract the mean from the data
-Y = bsxfun(@minus, X, mean(X));
-Ytest = bsxfun(@minus, Xtest, mean(X));
+% Load the data into Matlab
+cdir = fileparts(mfilename('fullpath')); 
+[NUMERIC, TXT, RAW] = xlsread(fullfile(cdir,'../Data/iris.xls'),1,'','basic');

-% Obtain the PCA solution by calculate the SVD of Y
-[U, S, V] = svd(Y, 'econ');
+% Extract the rows and columns corresponding to the data
+if isnan(NUMERIC(1,1))
+	X = NUMERIC(2:end,:);
+else
+	X = NUMERIC;
+end

-% Number of principal components to use, i.e. the reduced dimensionality
-Krange = [8,10,15,20,30,40,50,60,100,150];
-errorRate = zeros(length(Krange),1);
-for i = 1:length(Krange)
-    K=Krange(i);
-    % Compute the projection onto the principal components
-    Z = Y*V(:,1:K);
-    Ztest = Ytest*V(:,1:K);
+% Extract attribute names from the first row
+attributeNames = RAW(1,1:4)';

-    % Classify digits using a K-nearest neighbour classifier
-    model=fitcknn(Z,y,'NumNeighbors',1);
-    yest = predict(model,Ztest);
+% Extract unique class names from the last column
+classLabels = RAW(2:end,5)';
+classNames = unique(classLabels);

-    errorRate(i) = nnz(ytest~=yest)/length(ytest);
+% Extract class labels that match the class names
+[y_,y] = ismember(classLabels, classNames); y = y'-1;

-    % Display results
-    fprintf('Error rate %.1f%%\n',errorRate(i)*100);
-end
+% Get the number of data objects, attributes, and classes
+[N, M] = size(X);
+C = length(classNames);

-%% Visualize error rates vs. number of principal components considered
-mfig('Variance explained by principal components'); clf;
-plot(Krange,errorRate, 'o-');
-xlabel('Number of principal components K')
-ylabel('Error rate [%]')
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_2.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_2.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_3.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_3.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_4.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_4.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_5.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_5.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_6.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_6.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_2_7.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_2_7.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_3_1.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_3_1.m
--- a/exercises/02450Toolbox_Matlab/Scripts/ex4_3_2.m
+++ b/exercises/02450Toolbox_Matlab/Scripts/ex4_3_2.m