Skip to content
Snippets Groups Projects
Select Git revision
  • 90fad42931af21151e00d8eb30dcc84525ecacd0
  • master default protected
2 results

Gpu3dBlockProc.m

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    Gpu3dBlockProc.m 5.10 KiB
    function varargout = Gpu3dBlockProc(Vi,BlkSz,Fun,varargin)
    % GPU3DBLOCKPROC GPU block processing for 3D volumes
    % Vo = Gpu3dBlockProc(Vi,BlkSiz,Fun)
    % Vo = Gpu3dBlockProc(___,Name,Value,...)
    %
    % INPUT
    % Vi    - Volume or cell with NumIn input volumes
    % BlkSz - Block size - must be [m n l] vector or a scalar
    % Fun   - Function to apply - must take NumIn inputs and return NumOut
    %         outputs
    %
    % KEY-VALUE PARAMETERS
    % BorderSize - [n m l] vector or scalar with the size of the block border 
    %              (default: 0)
    % Verbose    - Print status messages (default: false)
    % NumOut     - Number of outputs for Fun (default: 1)
    % UseGpu     - Do processing on the GPU (default: true)
    %
    % OUTPUT
    % [...] - NumOut x 1 (varargout) cell with output volumes
    %
    % See also BLOCKINDICES
    %
    % Patrick M. Jensen, 2019, ROCKWOOL
    
    % This function was heavily inspired by MATLABs blockproc function, as
    % well as:
    %
    % blockproc3 from the Gerardus toolbox
    % https://github.com/vigente/gerardus/blob/master/matlab/FiltersToolbox/blockproc3.m
    
    % Handle inputs
    narginchk(3,inf);
    prs = inputParser;
    % Required
    addRequired(prs,'Vi',@(x) validateattributes(x,...
        {'cell'},{'vector','nonempty'}));
    addRequired(prs,'BlkSz',@(x) validateattributes(x,...
        {'numeric'},{'integer','vector','positive'}));
    addRequired(prs,'Fun',@(x) validateattributes(x,...
        {'function_handle'},{}));
    % Key-value parameters
    addParameter(prs,'BorderSize',0,@(x) validateattributes(x,...
        {'numeric'},{'vector','vector','nonnegative'}));
    addParameter(prs,'Verbose',false,@(x) validateattributes(x,...
        {'logical'},{'scalar'}));
    addParameter(prs,'NumOut',1,@(x) validateattributes(x,...
        {'numeric'},{'scalar','positive','integer'}));
    addParameter(prs,'UseGpu',true,@(x) validateattributes(x,...
        {'logical'},{'scalar'}));
    
    if ~iscell(Vi), Vi = {Vi}; end % Make sure Vi is a cell array
    parse(prs,Vi,BlkSz,Fun,varargin{:});
    VSz = size(Vi{1});
    NumIn = length(Vi);
    cellfun(@(x) validateattributes(x,...
        {'numeric'},{'3d','real','size',VSz}),Vi);
    Opts = prs.Results;
    if Opts.UseGpu && gpuDeviceCount == 0
        error('No GPU available');
    end
    
    if isscalar(BlkSz)
        BlkSz = repmat(BlkSz,1,3);
    end
    if isscalar(Opts.BorderSize)
        Opts.BorderSize = repmat(Opts.BorderSize,1,3);
    end
    
    if Opts.UseGpu
        sendDat = @gpuArray;
        recvDat = @gather;
    else
        sendDat = @(x) x;
        recvDat = @(x) x;
    end
    
    % Add additional 1 to end of sizes if needed, to ensure they are 3 vectors
    VSz = [VSz ones(1,3-length(VSz))];
    BlkSz = [BlkSz ones(1,3-length(BlkSz))];
    Opts.BorderSize = [Opts.BorderSize ones(1,3-length(Opts.BorderSize))];
    
    [B,E,BB,BE] = BlockIndices(VSz,BlkSz,Opts.BorderSize);
    
    nblks = length(B{1})*length(B{2})*length(B{3});
    blkdims = [length(B{1}),length(B{2}),length(B{3})];
    varargout = cell(Opts.NumOut,1);
    if nblks == 1
        % In case we only need 1 block, just pass the entire volume to Fun.
        % This is more efficient than using indexing since that takes a long 
        % time for large inputs.
        if Opts.Verbose
            TextProgressBar('Processing blocks: ');
            TextProgressBar(1, 1);
        end
        BlkIn = cellfun(sendDat,Vi,'UniformOutput',0);
        [varargout{:}] = Fun(BlkIn{:});
        varargout = cellfun(recvDat,varargout,'UniformOutput',0);
    else
        if Opts.Verbose, TextProgressBar('Processing blocks: '); end
        for oi = 1:Opts.NumOut
            varargout{oi} = zeros(VSz,class(Vi{1}));
        end
    
        BlkResDevice = cell(1,Opts.NumOut);
    
        if Opts.Verbose, TextProgressBar(1, nblks); end
        for blki = 1:nblks
            if Opts.Verbose, TextProgressBar(blki, nblks); end
    
            % Prepare new block for sending
            BlkInHost = GetHostBlock(blki);
            % Send new block to the GPu
            BlkInDevice = cellfun(sendDat,BlkInHost,'UniformOutput',0);
            % Start processing for new block
            [BlkResDevice{:}] = Fun(BlkInDevice{:});
            % Transfer current result back from the GPU
            BlkResHost = cellfun(recvDat,BlkResDevice,'UniformOutput',0);
            % Place previous block result in output volume
            PutHostBlock(blki,BlkResHost);
            % Clear device results
            for bi = 1:Opts.NumOut
                BlkResDevice{bi} = [];
            end
        end
    end
    if Opts.Verbose, fprintf(' Done\n'); end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %                             LOCAL FUNCTIONS
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
        function BlkHost = GetHostBlock(blki)
            [i,j,k] = ind2sub(blkdims, blki);
            BlkHost = cell(1,NumIn);
            for ii = 1:NumIn
                BlkHost{ii} = Vi{ii}(...
                    BB{1}(i):BE{1}(i),...
                    BB{2}(j):BE{2}(j),...
                    BB{3}(k):BE{3}(k));
            end
        end
    
        function PutHostBlock(blki,BlkResHost)
            [i,j,k] = ind2sub(blkdims, blki);
            for ii = 1:Opts.NumOut
                varargout{ii}(...
                    B{1}(i):E{1}(i),...
                    B{2}(j):E{2}(j),...
                    B{3}(k):E{3}(k)) = BlkResHost{ii}(...
                    (B{1}(i)-BB{1}(i)+1):(end-(BE{1}(i)-E{1}(i))),...
                    (B{2}(j)-BB{2}(j)+1):(end-(BE{2}(j)-E{2}(j))),...
                    (B{3}(k)-BB{3}(k)+1):(end-(BE{3}(k)-E{3}(k))));
            end
        end
    end