ECG-Kit 1.0

File: <base>/common/LIBRA/removeObsRobpca.m (5,781 bytes)
function res = removeObsRobpca(data,i,k,Hsets_min_i,same,factor_ind,csteps)

%REMOVEOBSROBPCA is an auxiliary function to perform cross-validation with ROBPCA, 
% RPCR, RSIMPLS, RSIMCA (see cvRobpca.m, cvRpcr.m, cvRsimpls.m).
%
% Input: 
%    data : the data set
%       i : the observation that is removed, index with respect to the whole data set.
%       k : the number of principal components that has to be calculated.
%  Hsets_min_i : contains H0_min_i, H1_min_i and Hfreq_min_i as first, second and third row respectively.
%                The h-subsets are implemented by means of indices of the observations in data_min_i, which is
%                is the original data set minus sample i. 
%  same : structure : 
%           same.value : indicates whether some part of this algorithm can be skipped (= 1) or not ( = 0). 
%           same.res : if same.value = 1, then some additional information is needed. 
%  factor : optional. default  = 0, then the original consistency factor is used.
%             Else factor = 1, the consistency factor is adapted to the kmax approach.
%  csteps : optional structure: 
%            csteps.value : 1 (default, then csteps are performed within robpca), 0 (no csteps are performed within robpca)
%            csteps.number : the number of csteps that need to be performed (default = 2).
%
% Output:
% res is the result structure. It contains:
%   Pk_min_i  : update of the loadingmatrix for a certain k when observation i is deleted.
%   Lk_min_i  : update of the eigenvalues for a certain k when observation i is deleted.
%   muk_min_i : update of the center for a certain k when observation i is deleted.
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
%              http://wis.kuleuven.be/stat/robust.html
%
% Written by S. Engelen

rot = [];
center = [];
P1 = [];

n = size(data,1);
p = size(data,2);
h = size(Hsets_min_i,2) + 1;

if nargin < 6    
    factor_ind = 0;    
    csteps.number = 2;   
    csteps.value = 1;
elseif nargin == 6    
    csteps.number = 2;    
    csteps.value = 1;
end

data_min_i= removal(data,i,0);
mu0 = mean(data);
center = mu0;

H0_min_j = Hsets_min_i(1,:);
H1_min_t = Hsets_min_i(2,:);

if ~same.value   
    [PH0_min_i,TH0,LH0_min_i,rH0_min_i,centerX,mu1trafo]=kernelEVD(data_min_i(H0_min_j,:));  
    LH0_min_i = diag(LH0_min_i);   
    
    % calculation of the projection
    % res.T2tilde = T2tilde;   
    center = mu1trafo;    
    rot = PH0_min_i;    
    T2tilde = (data_min_i - repmat(mu1trafo,n-1,1))*PH0_min_i;  
    T2tilde = T2tilde(:,1:k);  
    rot = rot(:,1:k);       
    
    % defining the outputstructure res:  
    res.PH0_min_i = PH0_min_i; 
    res.LH0_min_i = LH0_min_i;   
    res.mu1trafo = mu1trafo;
else
    % defining the input structure input
    res = same.res;  
    PH0_min_i = res.PH0_min_i;  
    LH0_min_i = res.LH0_min_i;   
    mu1trafo = res.mu1trafo;    
    T2tilde = (data_min_i - repmat(mu1trafo,n-1,1))*PH0_min_i;  
    center = mu1trafo;    
    rot = PH0_min_i;  
    rot = rot(:,1:k);   
    T2tilde = T2tilde(:,1:k);
end

mah = libra_mahalanobis(T2tilde,zeros(1,k),'invcov',1./diag(LH0_min_i(1:k,1:k)));
oldobj = prod(diag(LH0_min_i(1:k,1:k)));P4 = eye(k);

if csteps.value    
    oldobj = prod(diag(LH0_min_i(1:k,1:k)));  
    for j = 1:csteps.number     
        [mahsort, indsort] = sort(mah);      
        dataH1 = T2tilde(indsort(1:(h-1)),:);    
        [P,T,L,r3,Xm,clmX] = classSVD(dataH1);  
        obj = prod(L);      
        T2tilde = (T2tilde - repmat(clmX,n-1,1))*P;    
        center = center + clmX*rot';   
        rot = rot*P;      
        mah = libra_mahalanobis(T2tilde,zeros(1,size(T2tilde,2)),'invcov',1./L);      
        P4 = P4*P;    
        if abs(oldobj - obj) > 1.e-12      
            oldobj = obj;    
        else
            break   
        end
    end
else
    obj = oldobj;
end

% extra reweighting:
if k~=r3  
    XRc= data_min_i-repmat(center,n-1,1);   
    Xtilde = T2tilde*rot'; 
    Rdiff = XRc-Xtilde;   
    for i=1:n     
        odh(i,1)=norm(Rdiff(i,:));   
    end
    [m,s]=unimcd(odh.^(2/3),h);    
    cutoffodh = sqrt(norminv(0.975,m,s).^3);     
    indexset = find(odh<=cutoffodh)';
    [P,Threw,Lrew,rrew,Xmrew,clmX]=kernelEVD(data_min_i(indexset,:));   
    center = clmX;   
    rot = P(:,1:k);
end
T2tilde = (data_min_i - repmat(center,n-1,1))*rot;

% Perform mcdcov on some H-subsets:
[res_min_i,raw_min_i] = mcdcov(T2tilde,'Hsets',Hsets_min_i,'h',h-1,'ntrial',250,'plots',0,'factor',1);
% perform the last part of ROBPCA:
if raw_min_i.objective < obj  
    z = res_min_i;
else
    sortmah = sort(mah);  
    if h==n      
        factor=1;  
    else
        if factor_ind == 1        
            factor = sortmah(h-1)/chi2inv((h-1)/size(T2tilde,1),size(T2tilde,2)/2); % adjusted factor.       
        else
            factor = sortmah(h-1)/chi2inv((h-1)/size(T2tilde,1),size(T2tilde,2)); % non adjusted factor    
        end
    end
    mah = mah/factor;  
    weights = mah <= chi2inv(0.975,size(T2tilde,2)); 
    [center_noMCD,cov_noMCD] = weightmecov(T2tilde,weights);  
    mah = libra_mahalanobis(T2tilde,center_noMCD,'cov',cov_noMCD);  
    z.flag = (mah <= chi2inv(0.975,size(T2tilde,2)));   
    z.center = center_noMCD;  
    z.cov = cov_noMCD;
end

covf=z.cov;
centerf=z.center;

% The final PC:
[P6tilde,L6]=eig(covf);
[L6,I]=greatsort(diag(L6));
P6tilde=P6tilde(:,I);
T_min_i=(T2tilde-repmat(centerf,n-1,1))*P6tilde;

Pk_min_i=rot(:,1:k)*P6tilde;
Lk_min_i = L6;

res.Pk_min_i = Pk_min_i;
res.Lk_min_i = Lk_min_i;
res.muk_min_i = center + centerf*rot';