ECG-Kit 1.0
(5,925 bytes)
function result=cpcr(x,y,varargin)
%CPCR performs a classical principal components regression.
% First, classical PCA is applied to the predictor variables x (see cpca.m) and
% k components are retained. Then a multiple linear regression method (see mlr.m)
% is performed of the response variable y on the k principal components.
%
% I/O: result=cpcr(x,y,'k',2);
%
% Required input arguments:
% x : Data matrix of the explanatory variables
% (n observations in rows, p variables in columns)
% y : Data matrix of the response variables
% (n observations in rows, q variables in columns)
%
% Optional input argument:
% k : Number of principal components to compute. If k is missing,
% a scree plot is drawn which allows to select
% the number of principal components.
% plots : If equal to one (default), a menu is shown which allows to draw several plots,
% such as a score outlier map and a regression outlier map.
% If 'plots' is equal to zero, all plots are suppressed.
% See also makeplot.m
%
% The output is a structure containing
%
% result.slope : Classical slope
% result.int : Classical intercept
% result.fitted : Classcial prediction vector
% result.res : Classical residuals
% result.sigma : Estimated variance-covariance matrix of the residuals
% result.rsquared : R-squared value
% result.k : Number of components used in the regression
% result.sd : Classical score distances
% result.od : Classical orthogonal distances
% result.resd : Residual distances (when there are several response variables).
% If univariate regression is performed, it contains the standardized residuals.
% result.cutoff : Cutoff values for the score, orthogonal and residual distances.
% result.flag : The observations whose orthogonal distance is larger than result.cutoff.od
% (orthogonal outliers => result.flag.od) and/or whose residual distance is
% larger than result.cutoff.resd (bad leverage points/vertical outliers => result.flag.resd)
% can be considered as outliers and receive a flag equal to zero (=> result.flag.all).
% The regular observations, including the good leverage points, receive a flag 1.
% result.class : 'CPCR'
% result.cpca : Full output of the classical PCA part (see cpca.m)
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
% Written by S. Verboven
% Last Update: 05/04/2003
default=struct('plots',1,'k',0);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;
counter=1;
if nargin>2
%
% placing inputfields in array of strings
%
for j=1:nargin-2
if rem(j,2)~=0
chklist{i}=varargin{j};
i=i+1;
end
end
% Checking which default parameters have to be changed
% and keep them in the structure 'options'.
while counter<=IN
index=strmatch(list(counter,:),chklist,'exact');
if ~isempty(index) % in case of similarity
for j=1:nargin-2 % searching the index of the accompanying field
if rem(j,2)~=0 % fieldnames are placed on odd index
if strcmp(chklist{index},varargin{j})
I=j;
end
end
end
options=setfield(options,chklist{index},varargin{I+1});
index=[];
end
counter=counter+1;
end
end
%classical PCA
if options.k==0
out.cpca=cpca(x,'plots',0);
else
out.cpca=cpca(x,'plots',0,'k',options.k);
end
%model with intercept
T=[out.cpca.T(:,1:out.cpca.k) ones(size(out.cpca.T,1),1)];
%Multivariate linear regression
out.a=inv(T'*T)*T'*y;
out.fitted=T*out.a;
len=size(out.a,1);
p=size(T,2);
q=size(y,2);
geg=[T,y];
[n,m]=size(geg);
S=cov(geg);
Sx=S(1:p-1,1:p-1);
Sxy=S(1:p-1,p+1:m);
Syx=Sxy';
Sy=S(p+1:m,p+1:m);
Se=Sy-out.a(1:p-1,1:q)'*Sx*out.a(1:p-1,1:q); %variance of errors
%regression coefficients slope and intercept ([\beta \alpha])
out.coeffs=[out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:); out.a(len,:)-out.cpca.M*out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:)]; %%coefficients in the original space;
out.slope=out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:);
out.int=out.a(len,:)-out.cpca.M*out.cpca.P(:,1:out.cpca.k)*out.a(1:len-1,:);
out.res=y-out.fitted;
out.sigma=Se;
if q==1
out.stdres=out.res./sqrt(diag(out.sigma));
end
out.k=out.cpca.k;
STTm=sum((y-repmat(mean(y),length(y),1)).^2);
SSE=sum(out.res.^2);
out.rsquared=1-SSE/STTm;
out.class='CPCR';
%calculating residual distances
if q>1
if (-log(det(Se)/(m-1)))>50
out.resd='singularity';
else
cen=zeros(q,1);
out.resd=sqrt(libra_mahalanobis(out.res,cen','cov',Se))';
end
else % q==1
out.resd=out.stdres; %standardized residuals
end
out.cutoff=out.cpca.cutoff;
out.cutoff.resd=sqrt(chi2inv(0.975,size(y,2)));
%computing flags
out.flag.od=out.cpca.flag.od;
out.flag.sd=out.cpca.flag.sd;
out.flag.resd=abs(out.resd)<=out.cutoff.resd;
out.flag.all=(out.flag.od & out.flag.resd);
result=struct( 'slope',{out.slope}, 'int',{out.int},'fitted',{out.fitted},'res',{out.res},...
'cov',{out.sigma},'rsquared',{out.rsquared},...
'k',{out.k},'sd', {out.cpca.sd},'od',{out.cpca.od},'resd',{out.resd},...
'cutoff',{out.cutoff},'flag',{out.flag},'class',{out.class},...
'cpca',{out.cpca});
try
if options.plots
makeplot(result)
end
catch %output must be given even if plots are interrupted
%> delete(gcf) to get rid of the menu
end