ECG-Kit 1.0
(12,132 bytes)
function result=mcdregres(x,y,varargin)
%MCDREGRES is a robust multivariate regression method. It can handle multiple
% response variables. The estimates are based on the robust MCD estimator of
% location and scatter (see mcdcov.m). The explanatory variables should be
% low-dimensional, otherwise robust principal component regression (rpcr.m)
% or robust partial least squares (rsimpls.m) should be applied.
%
% The MCD regression method is described in
% Rousseeuw, P.J., Van Aelst, S., Van Driessen, K, Agullo, J. (2004),
% "Robust multivariate regression", Technometrics, 46, pp 293-305.
%
% Required input arguments:
% x : Data matrix of the explanatory variables
% (n observations in rows, p variables in columns)
% y : Data matrix of the response variables
% (n observations in rows, q variables in columns)
%
% Optional input arguments:
% alpha : (1-alpha) measures the amount of contamination the algorithm should
% resist. Any value between 0.5 and 1 may be specified. (default = 0.75)
% h : The quantile of observations whose covariance determinant will
% be minimized. Any value between n/2 and n may be specified.
% The default value is 0.75*n.
% ntrial : The number of random trial subsamples that are drawn for
% large datasets. (default = 500)
% plots : If equal to one, a menu is shown which allows to draw a regression
% outlier map. (default)
% If the input argument 'classic' is equal to one, the classical
% plot is drawn as well.
% If 'plots' is equal to zero, all plots are suppressed.
% See also makeplot.m
% classic : If equal to one, classical multivariate linear regression
% is performed as well, see mlr.m. (default = 0)
%
% Input arguments for advanced users:
% Hsets : Instead of random trial h-subsets (default, Hsets = []), Hsets makes it possible to give certain
% h-subsets as input. Hsets is a matrix that contains the indices of the observations of one
% h-subset as a row.
%
% I/O: result=mcdregres(x,y,'alpha',0.75,'ntrial',500,'plots',1,'classic',0);
% The user should only give the input arguments that have to change their default value.
% The name of the input arguments needs to be followed by their value.
% The order of the input arguments is of no importance.
%
% Example: result=mcdregres(x,y,'plots',0,'alpha',0.70)
%
% The output is a structure which contains
% result.slope : Robust slope (matrix)
% result.int : Robust intercept (vector)
% result.fitted : Robust prediction matrix
% result.res : Robust residuals
% result.cov : Estimated variance-covariance matrix of the residuals
% result.rsquared : Robust R-squared value
% result.h : The quantile h used throughout the algorithm
% result.Hsubsets : A structure that contains Hopt and Hfreq:
% Hopt : The subset of h points whose covariance matrix has minimal determinant,
% ordered following increasing robust distances.
% Hfreq : The subset of h points which are the most frequently selected during the whole
% algorithm.
% result.rd : Robust scores distances in x-space
% result.resd : Residual distances (when there are several response variables).
% If univariate regression is performed, it contains the standardized residuals.
% result.cutoff : Cutoff values for the score and residual distances
% result.weights : The observations with weight one are used in the reweighting,
% the other observations have zero weight.
% result.flag : The observations whose residual distance is larger than result.cutoff.resd
% (bad leverage points/vertical outliers) can be considered as outliers and receive
% a flag equal to zero.
% The regular observations, including the good leverage points,
% receive a flag 1.
% result.class : 'MCDREG'
% result.classic : If the input argument 'classic' is equal to one, this structure
% contains results of classical multivariate regression (see also mlr.m).
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
% Original S-PLUS code by Katrien Vandriessen, implemented in MATLAB by Sabine Verboven
% Version date : 12/02/2004
% Last update: 04/08/2006
%%%%%%%%%%%%%%%%
% INITIALIZATION
%
intercept=ones(length(x),1);
geg=[x,y];
[n,m]=size(geg);
alfa=0.75;
hdefault=min(floor(2*floor((n+m+1)/2)-n+2*(n-floor((n+m+1)/2))*alfa),n);
if nargin < 3
options.alpha=alfa;
options.h=hdefault;
options.ntrial=500;
options.plots=1;
options.classic=0;
options.Hsets=[];
else
default=struct('alpha',alfa,'h',hdefault,'ntrial',500,'plots',1,'classic',0,'Hsets',[]);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;
counter=1;
%
if nargin>2
%
%placing inputfields in array of strings
%
for j=1:nargin-2
if rem(j,2)~=0
chklist{i}=varargin{j};
i=i+1;
end
end
dummy=sum(strcmp(chklist,'h')+2*strcmp(chklist,'alpha'));
switch dummy
case 0 %no input for alpha or h so take on the default values
options.alpha=0.75;
options.h=floor(options.alpha*n);
case 3
error('Both input arguments alpha and h are provided. Only one is required.')
end
%
%Checking which default parameters have to be changed
% and keep them in the structure 'options'.
%
while counter<=IN
index=strmatch(list(counter,:),chklist,'exact');
if ~isempty(index) %in case of similarity
for j=1:nargin-2 %searching the index of the accompanying field
if rem(j,2)~=0 %fieldnames are placed on odd index
if strcmp(chklist{index},varargin{j})
I=j;
end
end
end
options=setfield(options,chklist{index},varargin{I+1});
index=[];
end
counter=counter+1;
end
if dummy==1
options.alpha=options.h/n;
elseif dummy==2
options.h=floor(options.alpha*n);
end
Hsets = options.Hsets;
end
end
%%%%%%%%
% MAIN %
[mcd_res, mcd_raw]=mcdcov(geg,'h',options.h,'plots',0,'Hsets',options.Hsets);
options.h = mcd_res.h;
%in case of an exact fit, the calculations stop at this point.
if ~isempty(mcd_res.plane)
disp('Warning (mcdregres): The MCD covariance matrix is singular. See also mcdcov.m.')
result=mcd_res;
return
end
mcdreg.Hsubsets.Hopt = mcd_res.Hsubsets.Hopt;
mcdreg.Hsubsets.Hfreq = mcd_res.Hsubsets.Hfreq;
rewcovmcd=mcd_res.cov; %one-step reweighted covariance matrix
rewcenmcd=mcd_res.center; %one-step reweighted location
q=size(y,2);
p=size(x,2);
%initializing reweighted location and reweighted scatter (paragraph 5.1 of paper)
rewtmcdx=rewcenmcd(1:p)'; %column vectors!!!
rewtmcdy=rewcenmcd((p+1):m)';
rewsmcdx=rewcovmcd(1:p,1:p);
rewsmcdxy=rewcovmcd(1:p,(p+1):m);
rewsmcdyx=rewsmcdxy';
rewsmcdy=rewcovmcd((p+1):m,(p+1):m);
covyy=rewsmcdy;
covxx = rewsmcdx;
covxy = rewsmcdxy;
covyx = rewsmcdyx;
mcdreg.Sigma = [covxx, covxy ; covyx, covyy];
mcdreg.Mu = [rewtmcdx;rewtmcdy];
%reweighted beta and alpha (the columnvector [\beta^L ; \alpha^L])
rewbetamcd=[inv(rewsmcdx)*rewsmcdxy; (rewtmcdy-(rewsmcdyx*inv(rewsmcdx)*rewtmcdx))'];
%calculation of the reweighted weights based on
%the residuals calculated with the reweighted beta-coefficients
rewweights=zeros(n,1);
weights=zeros(n,1);
rewfitted=[x,intercept]*rewbetamcd(1:(p+1),:);
rewresid=y-rewfitted; %(r_i^L)
rewE=rewsmcdy-rewbetamcd(1:p,1:q)'*rewsmcdx*rewbetamcd(1:p,1:q); %reweighted scatter (\Sigma_eps^L)
for j=1:n
if (sqrt(rewresid(j,1:q)*inv(rewE)*rewresid(j,1:q)')) <= sqrt(chi2inv(0.99,q))
rewweights(j)=1;
end
end
%regression reweighting part based on the reweighted observations. (paragraph 5.3 of paper)
rewclasscov=cov(geg(rewweights==1,:));
rewclasscenter=mean(geg(rewweights==1,:));
rewtmcdx=rewclasscenter(1:p)';
rewtmcdy=rewclasscenter((p+1):m)';
rewsmcdx=rewclasscov(1:p,1:p);
rewsmcdxy=rewclasscov(1:p,(p+1):m);
rewsmcdyx=rewsmcdxy';
rewsmcdy=rewclasscov((p+1):m,(p+1):m);
%regression reweighted coefficients beta and alpha ([\beta^{RL} \alpha^{RL}])
rewbetamcdrew=[inv(rewsmcdx)*rewsmcdxy; (rewtmcdy-(rewsmcdyx*inv(rewsmcdx)*rewtmcdx))'];
%regression reweighted scatter (\Sigma_eps^{RL}])
rewE2=rewsmcdy-rewbetamcdrew(1:p,1:q)'*rewsmcdx*rewbetamcdrew(1:p,1:q);
rewfittedrew=[x,intercept]*rewbetamcdrew(1:(p+1),:);
rewresidrew=y-rewfittedrew;
%%%%%%%%%%%%%%%%%
%OUTPUT STRUCTURE
%
mcdreg.covyy=rewsmcdy; %regression and location reweighted covariance matrix of responses
mcdreg.covxx = rewsmcdx;
mcdreg.covxy = rewsmcdxy;
mcdreg.covyx = rewsmcdyx;
mcdreg.Sigmarew = [mcdreg.covxx, mcdreg.covxy ; mcdreg.covyx, mcdreg.covyy];
mcdreg.Murew = [rewtmcdx;rewtmcdy];
mcdreg.x=x;
mcdreg.y=y;
mcdreg.coeffs=rewbetamcdrew; %regression and location reweighted coefficients
mcdreg.cov=rewE2; %scatter matrix based on location and regression reweighting
mcdreg.fitted=rewfittedrew; %estimated respons(es);
mcdreg.res=rewresidrew; %regression and location reweighted residuals
if(intercept)
mcdreg.interc=1;
else
mcdreg.interc=0;
end
% Robust distances in x-space = x-distances (rd) needed in diagnostic regression plot
if (-log(det(mcd_res.cov))/m) > 50
mcdreg.rd='singularity';
else
mcdreg.rd=sqrt(libra_mahalanobis(mcdreg.x,rewcenmcd(1:p),'cov',rewcovmcd(1:p,1:p)))';
end
% Robust residual distances (resd) needed in diagnostic regression plot
if q>1
if (-log(det(mcd_res.cov))/m)>50
mcdreg.resd='singularity';
disp('Warning (mcdregres): A singularity ')
else
cen=zeros(q,1)';
[nn,pp]=size(rewresidrew);
mcdreg.resd=sqrt(libra_mahalanobis(rewresidrew,cen,'cov',mcdreg.cov))'; %robust distances of residuals
end
else
mcdreg.covarRes=sqrt(mcdreg.cov);
mcdreg.resd=rewresidrew(:,1)/mcdreg.covarRes; %standardized residuals
end
% cutoff values
mcdreg.cutoff.rd=sqrt(chi2inv(0.975,p));
mcdreg.cutoff.resd=sqrt(chi2inv(0.975,q));
mcdreg.flag=(abs(mcdreg.resd)<=mcdreg.cutoff.resd);
% robust multivariate Rsquared
mcdreg.weights=rewweights;
Yw=y(mcdreg.weights==1,:);
cYw=mcenter(Yw);
res=rewresidrew(mcdreg.weights==1,:);
mcdreg.rsquared=1-(det(res'*res)/det(cYw'*cYw));
mcdreg.class='MCDREG';
if options.classic
mcdreg.classic=mlr(x,y,'plots',0);
else
mcdreg.classic=0;
end
result=struct('slope',{mcdreg.coeffs(1:p,:)}, 'int',{mcdreg.coeffs(p+1,:)}, ...
'fitted',{mcdreg.fitted},'res',{mcdreg.res},'cov',{mcdreg.cov},'rsquared',{mcdreg.rsquared},...
'h',{options.h},'Hsubsets',{mcdreg.Hsubsets},'rd', {mcdreg.rd},'resd',{mcdreg.resd},'cutoff',{mcdreg.cutoff},...
'weights',{mcdreg.weights},'flag',{mcdreg.flag'},'class',{mcdreg.class},'classic',{mcdreg.classic},...
'Mu',{mcdreg.Mu},'Sigma',{mcdreg.Sigma},'Murew',{mcdreg.Murew},'Sigmarew',{mcdreg.Sigmarew});
try
if options.plots & options.classic
makeplot(result,'classic',1)
elseif options.plots
makeplot(result)
end
catch %output must be given even if plots are interrupted
%> delete(gcf) to get rid of the menu
end