ECG-Kit 1.0
(7,003 bytes)
function result=csimpls(x,y,varargin)
%CSIMPLS performs Partial Least Squares regression using the SIMPLS
% algorithm of de Jong (1993).
%
% Reference:
% de Jong, S. (1993),
% "SIMPLS: an alternative approach to Partial Least Squares Regression",
% Chemometrics and Intelligent Laboratory Systems, 18, 251-263.
%
% Required input arguments:
% x : Data matrix of the explanatory variables
% (n observations in rows, p variables in columns)
% y : Data matrix of the response variables
% (n observations in rows, q variables in columns)
%
% Optional input arguments:
% k : Number of components to be used.
% (default = min([9,rank([x,y]),floor(n/2),p])).
% plots : If equal to one, a menu is shown which allows to draw several plots,
% such as a score outlier map and a regression outlier map. (default)
% If 'plots' is equal to zero, all plots are suppressed.
% See also makeplot.m
%
% I/O: result=csimpls(x,y,'k',10);
%
% The output of CSIMPLS is a structure containing:
%
% result.slope : Classical slope estimate
% result.int : Classical intercept estimate
% result.fitted : Classical prediction vector
% result.res : Classical residuals
% result.cov : Estimated variance-covariance matrix of the residuals
% result.M : Classical center of the matrix [X;Y]
% result.T : Classical scores
% result.weights.r : Classical simpls weights
% result.weights.p : Classical simpls weights
% result.Tcov : Classical covariance matrix of the scores
% result.k : Number of components used in the regression
% result.sd : Classical score distances
% result.od : Classical orthogonal distances
% result.resd : Residual distances (when there are several response variables).
% If univariate regression is performed, it contains the standardized residuals.
% result.cutoff : Cutoff values for the score, orthogonal and residual distances.
% result.flag : The observations whose orthogonal distance is larger than result.cutoff.od
% (orthogonal outliers => result.flag.od) and/or whose residual distance is
% larger than result.cutoff.resd (bad leverage points/vertical outliers => result.flag.resd)
% can be considered as outliers and receive a flag equal to zero (=> result.flag.all).
% The regular observations, including the good leverage points, receive a flag 1.
% result.class : 'CSIMPLS'
%
% This function is part of LIBRA: the Matlab library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust.html
%
%Written by Karlien Vanden Branden
%Last Update: 05/04/2003
%Last revision: 20/05/2008
[n,p1]=size(x);
[n2,q1]=size(y);
if n~=n2
error('The response variables and the predictor variables have a different number of observations.')
end
kmin=min([9,rank(x),floor(n/2),p1]);
default=struct('plots',1,'k',kmin);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;
counter=1;
if nargin>2
%
% placing inputfields in array of strings
%
for j=1:nargin-2
if rem(j,2)~=0
chklist{i}=varargin{j};
i=i+1;
end
end
% Checking which default parameters have to be changed
% and keep them in the structure 'options'.
while counter<=IN
index=strmatch(list(counter,:),chklist,'exact');
if ~isempty(index) % in case of similarity
for j=1:nargin-2 % searching the index of the accompanying field
if rem(j,2)~=0 % fieldnames are placed on odd index
if strcmp(chklist{index},varargin{j})
I=j;
end
end
end
options=setfield(options,chklist{index},varargin{I+1});
index=[];
end
counter=counter+1;
end
end
[xcentr,cx]=mcenter(x);
[ycentr,cy]=mcenter(y);
out.M=[cx cy];%[xcentr ycentr];
sigmaxy=xcentr'*ycentr;
for i=1:options.k
sigmayx=sigmaxy';
if q1>p1
[RR,LL]=eig(sigmaxy*sigmayx);
[LL,I]=greatsort(diag(LL));
rr=RR(:,I(1));
qq=sigmayx*rr;
qq=qq/norm(qq);
else
if q1==1
qq = 1;
rr = sigmaxy;
else
[QQ,LL]=eig(sigmayx*sigmaxy);
[LL,I]=greatsort(diag(LL));
qq=QQ(:,I(1));
rr=sigmaxy*qq;
end
end
tt=xcentr*rr;
nttc=norm(tt);
rr=rr/nttc;
tt=tt/nttc;
qq=ycentr'*tt;
uu=ycentr*qq;
pp=xcentr'*tt;
vv=pp;
if i>1
vv = vv -v*(v'*pp);
end
vv = vv/norm(vv);
sigmaxy = sigmaxy - vv*(vv'*sigmaxy);
v(:,i)=vv;
q(:,i)=qq;
t(:,i)=tt;
u(:,i)=uu;
p(:,i)=pp;
r(:,i)=rr;
end
%Second Stage : Classical Regression and transformation
b=r*q';
int=cy-cx*b;
%classical output
out.T=t;
out.weights.p=p;
out.weights.r=r;
out.coef=[b; int];
out.b=b;
out.int=int;
out.yhat=x*b+repmat(int,n,1);
out.res=y-out.yhat;
out.covar=cov(out.res);
if q1==1
out.stdres=out.res./sqrt(out.covar);
end
out.k=options.k;
STTm=sum((y-repmat(mean(y),length(y),1)).^2);
SSE=sum(out.res.^2);
out.rsquare=1-SSE/STTm;
out.class='CSIMPLS';
%calculating classical distances in x-space
out.Tcov=cov(t);
out.sd=sqrt(libra_mahalanobis(t,zeros(1,out.k),'cov',out.Tcov))';
out.cutoff.sd=sqrt(chi2inv(0.975,out.k));
%calculating classical orthogonal distances
xt=t*p';
tempo=xcentr-xt;
for i=1:n
out.od(i,1)=norm(tempo(i,:));
end
r=rank(x);
if out.k~=r
m=mean(out.od.^(2/3));
s=sqrt(var(out.od.^(2/3)));
out.cutoff.od = sqrt(norminv(0.975,m,s)^3);
else
out.cutoff.od=0;
end
%calculating residual distances
if q1>1
if (-log(det(out.covar)/(p1+q1-1)))>50
out.resd='singularity';
else
cen=zeros(q1,1);
out.resd=sqrt(libra_mahalanobis(out.res,cen','cov',out.covar))';
end
else %here q==1
out.resd=out.stdres; %standardized residuals
end
out.cutoff.resd=sqrt(chi2inv(0.975,q1));
%Computing flags
out.flag.od=out.od<=out.cutoff.od;
out.flag.resd=abs(out.resd)<=out.cutoff.resd;
out.flag.all=(out.flag.od & out.flag.resd);
result=struct('slope',{out.b}, 'int',{out.int},'fitted',{out.yhat},'res',{out.res}, 'cov',{out.covar},...
'M', {out.M},'T',{out.T} ,'weights', {out.weights},'Tcov',{out.Tcov},'k',{out.k},'sd', {out.sd},'od',{out.od},'resd',{out.resd},...
'cutoff',{out.cutoff},'flag',{out.flag},'class',{out.class});
try
if options.plots
makeplot(result)
end
catch %output must be given even if plots are interrupted
end