ECG-Kit 1.0

File: <base>/common/LIBRA/csimpls.m (7,003 bytes)
function result=csimpls(x,y,varargin)

%CSIMPLS performs Partial Least Squares regression using the SIMPLS 
% algorithm of de Jong (1993).
%
% Reference: 
%    de Jong, S. (1993),
%    "SIMPLS: an alternative approach to Partial Least Squares Regression", 
%    Chemometrics and Intelligent Laboratory Systems, 18, 251-263.
%
% Required input arguments:
%            x : Data matrix of the explanatory variables
%                (n observations in rows, p variables in columns)
%            y : Data matrix of the response variables
%                (n observations in rows, q variables in columns)
%     
% Optional input arguments:
%            k : Number of components to be used.  
%                (default = min([9,rank([x,y]),floor(n/2),p])). 
%        plots : If equal to one, a menu is shown which allows to draw several plots,
%                such as a score outlier map and a regression outlier map. (default)
%                If 'plots' is equal to zero, all plots are suppressed.
%                See also makeplot.m
%
% I/O: result=csimpls(x,y,'k',10);
%
% The output of CSIMPLS is a structure containing:
%
%   result.slope     : Classical slope estimate
%   result.int       : Classical intercept estimate
%   result.fitted    : Classical prediction vector
%   result.res       : Classical residuals
%   result.cov       : Estimated variance-covariance matrix of the residuals 
%   result.M         : Classical center of the matrix [X;Y]
%   result.T         : Classical scores
%   result.weights.r : Classical simpls weights
%   result.weights.p : Classical simpls weights
%   result.Tcov      : Classical covariance matrix of the scores
%   result.k         : Number of components used in the regression
%   result.sd        : Classical score distances
%   result.od        : Classical orthogonal distances
%   result.resd        : Residual distances (when there are several response variables).
%                      If univariate regression is performed, it contains the standardized residuals.
%   result.cutoff    : Cutoff values for the score, orthogonal and residual distances.
%   result.flag      : The observations whose orthogonal distance is larger than result.cutoff.od
%                      (orthogonal outliers => result.flag.od) and/or whose residual distance is 
%                      larger than result.cutoff.resd (bad leverage points/vertical outliers => result.flag.resd)
%                      can be considered as outliers and receive a flag equal to zero (=> result.flag.all).
%                      The regular observations, including the good leverage points, receive a flag 1.
%   result.class     : 'CSIMPLS'
%
% This function is part of LIBRA: the Matlab library for Robust Analysis,
% available at: 
%              http://wis.kuleuven.be/stat/robust.html
%
%Written by Karlien Vanden Branden
%Last Update: 05/04/2003 
%Last revision: 20/05/2008

[n,p1]=size(x);
[n2,q1]=size(y);
if n~=n2
    error('The response variables and the predictor variables have a different number of observations.')
end
kmin=min([9,rank(x),floor(n/2),p1]);
default=struct('plots',1,'k',kmin);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;   
counter=1;
if nargin>2
    %
    % placing inputfields in array of strings
    %
    for j=1:nargin-2
        if rem(j,2)~=0
            chklist{i}=varargin{j};
            i=i+1;
        end
    end 
    % Checking which default parameters have to be changed
    % and keep them in the structure 'options'.
    while counter<=IN 
        index=strmatch(list(counter,:),chklist,'exact');
        if ~isempty(index) % in case of similarity
            for j=1:nargin-2 % searching the index of the accompanying field
                if rem(j,2)~=0 % fieldnames are placed on odd index
                    if strcmp(chklist{index},varargin{j})
                        I=j;
                    end
                end
            end
            options=setfield(options,chklist{index},varargin{I+1});
            index=[];
        end
        counter=counter+1;
    end
end

[xcentr,cx]=mcenter(x);
[ycentr,cy]=mcenter(y);
out.M=[cx cy];%[xcentr ycentr];
sigmaxy=xcentr'*ycentr;
for i=1:options.k
    sigmayx=sigmaxy';
    if q1>p1        
        [RR,LL]=eig(sigmaxy*sigmayx);  
        [LL,I]=greatsort(diag(LL));
        rr=RR(:,I(1));
        qq=sigmayx*rr; 
        qq=qq/norm(qq);
    else
        if q1==1
            qq = 1;
            rr = sigmaxy;
        else
            [QQ,LL]=eig(sigmayx*sigmaxy);
            [LL,I]=greatsort(diag(LL));
            qq=QQ(:,I(1));
            rr=sigmaxy*qq;
        end
    end
    tt=xcentr*rr;
    nttc=norm(tt);
    rr=rr/nttc;
    tt=tt/nttc;
    qq=ycentr'*tt;
    uu=ycentr*qq;
    pp=xcentr'*tt;
    vv=pp;
    if i>1 												
        vv = vv -v*(v'*pp);
    end
    vv = vv/norm(vv);
    sigmaxy = sigmaxy - vv*(vv'*sigmaxy);
    v(:,i)=vv;
    q(:,i)=qq;
    t(:,i)=tt;
    u(:,i)=uu;
    p(:,i)=pp;
    r(:,i)=rr;
end

%Second Stage : Classical Regression and transformation

b=r*q';
int=cy-cx*b;

%classical output
out.T=t;
out.weights.p=p;
out.weights.r=r;
out.coef=[b; int];
out.b=b;
out.int=int;
out.yhat=x*b+repmat(int,n,1);
out.res=y-out.yhat;
out.covar=cov(out.res);
if q1==1
    out.stdres=out.res./sqrt(out.covar);
end
out.k=options.k;
STTm=sum((y-repmat(mean(y),length(y),1)).^2);
SSE=sum(out.res.^2);
out.rsquare=1-SSE/STTm;
out.class='CSIMPLS';

%calculating classical distances in x-space
out.Tcov=cov(t);
out.sd=sqrt(libra_mahalanobis(t,zeros(1,out.k),'cov',out.Tcov))';
out.cutoff.sd=sqrt(chi2inv(0.975,out.k));

%calculating classical orthogonal distances
xt=t*p';
tempo=xcentr-xt;
for i=1:n
    out.od(i,1)=norm(tempo(i,:));
end
r=rank(x);
if out.k~=r
    m=mean(out.od.^(2/3));
    s=sqrt(var(out.od.^(2/3)));
    out.cutoff.od = sqrt(norminv(0.975,m,s)^3); 
else
    out.cutoff.od=0;
end

%calculating residual distances
if q1>1
    if (-log(det(out.covar)/(p1+q1-1)))>50
        out.resd='singularity';
    else
       cen=zeros(q1,1);
       out.resd=sqrt(libra_mahalanobis(out.res,cen','cov',out.covar))';
   end
else    %here q==1
    out.resd=out.stdres; %standardized residuals
end
out.cutoff.resd=sqrt(chi2inv(0.975,q1));

%Computing flags
out.flag.od=out.od<=out.cutoff.od;
out.flag.resd=abs(out.resd)<=out.cutoff.resd;
out.flag.all=(out.flag.od & out.flag.resd);


result=struct('slope',{out.b}, 'int',{out.int},'fitted',{out.yhat},'res',{out.res}, 'cov',{out.covar},...
    'M', {out.M},'T',{out.T} ,'weights', {out.weights},'Tcov',{out.Tcov},'k',{out.k},'sd', {out.sd},'od',{out.od},'resd',{out.resd},...
    'cutoff',{out.cutoff},'flag',{out.flag},'class',{out.class});

try
    if options.plots
        makeplot(result)
    end
catch %output must be given even if plots are interrupted 
end