ECG-Kit 1.0
(4,461 bytes)
function result = cvMcd(data,kmax,resMCD,h)
%CVMCD calculates the robust cross-validated PRESS (predicted residual error sum of squares)
% curve for the MCD method in a fast way.
%
% Input arguments:
% data : the full data set
% kmax : the maximal number of components to be considered (mostly kmax = p).
% resMCD : the result of mcdcov(data,'plots',0,'factor',1)
% h : the quantile used in MCD.
%
% output:
% result.press : vector of length kmax with the press values
% result.weights : the weights for all observations
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
% Written by Sanne Engelen
% Last Update: 01/07/2004
% Some initialisations:
n = size(data,1);
p = size(data,2);
r = rank(data);
Pk = [];
Lk = [];
teller_if_lus = 0;
if nargin < 4
alfa = 0.75;
h=floor(2*floor((n+p+1)/2)-n+2*(n-floor((n+p+1)/2))*alfa);
end
outWeights = weightscvMcd(data,r,kmax,resMCD,h);
w_min = outWeights.w_min;
Hopt = resMCD.Hsubsets.Hopt;
inputH0.H0 = Hopt;
Tfull = mean(data(Hopt,:));
Sfull = cov(data(Hopt,:));
for i = 1:n
% deciding which index should be removed from H0.
inputH0.same = 0;
if isempty(find(inputH0.H0 == i))
inputH0.j = h;
if teller_if_lus >= 1
inputH0.same = 1;
end
teller_if_lus = teller_if_lus + 1;
else
inputH0.j = find(inputH0.H0 == i);
end
% assigning the input variables:
inputFull.T = Tfull;
inputFull.S = Sfull;
if ~inputH0.same
res = removeObsMcd(data,i,inputH0,inputFull);
end
if (isempty(find(inputH0.H0 == i))) & (teller_if_lus == 1)
resfixed = res;
end
if isempty(find(inputH0.H0 == i)) & (teller_if_lus ~= 1)
res = resfixed;
end
P_min_i = res.P_min_i;
L_min_i = res.L_min_i;
mu_min_i = res.mu_min_i;
for k = 1:kmax
clear Pk Lk;
Pk = P_min_i(:,1:k);
Lk = L_min_i(1:k,1:k);
Xhoedk_min_i(i,(k-1)*p + 1:k*p) = (data(i,:) - mu_min_i)*Pk*Pk' + mu_min_i;
if k~=r
odk(i,k) = norm(data(i,:) - Xhoedk_min_i(i,(k-1)*p + 1:k*p));
else
odk(i,k) = 0;
end
end
end
for k = 1:kmax
press_min(k) = 1/sum(w_min)*w_min*odk(:,k).^2;
end
result.press = press_min;
result.weights = outWeights;
%----------------------------------------------------------------------------------
function out = weightscvMcd(data,r,kmax,resMCD,h)
% computes the weights used to calculate the robust PRESS values.
%
% input:
% data : the whole data
% r : the rank of the data
% kmax : the maximal number of components to be considered
% resMCD : the result of mcdcov(data,'plots',0,'factor',1)
% h : the number of observations on which the computations are based.
%
% output:
% out.w_min : the weights computed by taken the minimum over all k
% Some initialisations:
n = size(data,1);
p = size(data,2);
Pk = [];
Lk = [];
Tik = [];
[P,L] = eig(resMCD.cov);
[L,I] = greatsort(diag(L));
P = P(:,I);
for k = 1:kmax
Pk = P(:,1:k);
if h==n
Lk=L(1:k);
else
Lk = chi2inv(h/n,k)/chi2inv(h/n,kmax/2)*L(1:k);% with correction for the factor
end
muk = resMCD.center;
Xhoedk(:,(k-1)*p + 1:k*p) = (data - repmat(muk,n,1))*Pk*Pk' + repmat(muk,n,1);
Tk = (data - repmat(muk,n,1))*Pk;
for i =1:n
% defining the sd for the observation that is left:
sdk(i,k) = sqrt(libra_mahalanobis(Tk(i,:),zeros(1,k),'cov',diag(Lk)));
% defining the od for the observation that is left:
if k~=r
odk(i,k) = norm(data(i,:) - Xhoedk(i,(k-1)*p + 1:k*p));
else
odk(i,k) = 1;
end
end
% defining weights for odk and sdk:
if k~=r
[m,s]=unimcd(odk(:,k).^(2/3),h);
cutoff(k)=sqrt(norminv(0.975,m,s).^3);
wod(:,k) = (odk(:,k) <= cutoff(k));
else
cutoff(k)= 0;
wod(:,k) = 1;
end
wsd(:,k) = (sdk(:,k) <= sqrt(chi2inv(0.975,k)));
end
% determine the weights for every observation:
wk = wsd & wod;
if size(wk,1) == 1 | size(wk,2) == 1
w_min = wk';
else
w_min = min(wk');
end
out.w_min = w_min;