ECG-Kit 1.0

File: <base>/common/LIBRA/cpca.m (4,568 bytes)
function result=cpca(x,varargin)

%CPCA performs a classical principal components analysis 
% on the data set x. The loadings correspond with the eigenvectors of the
% classical covariance matrix of x.
% 
% Required input argument:
%      x : Data matrix (observations in the rows, variables in the
%          columns)
%
% Optional input argument: 
%      k : Number of principal components to compute. If k is missing, 
%          a screeplot is plotted allowing the selection of
%          the number of principal components.
%  plots : If equal to one (default), a menu is shown which allows to draw several plots,
%          such as a score outlier map. 
%          If 'plots' is equal to zero, all plots are suppressed.
%
% I/O: result=cpca(x,'k',2)
%
% The output is a structure containing 
%
%    result.P       : Classical loadings (eigenvectors of covariance matrix)
%    result.L       : Classical eigenvalues (eigenvalues of covariance matrix)
%    result.M       : Classical mean of the columns of x
%    result.T       : Classical scores 
%    result.k       : Number of (chosen) principal components 
%    result.sd      : Score (mahalanobis) distances within the classical PCA subspace
%    result.od      : Orthogonal distances to the classical PCA subspace 
%    result.cutoff  : Cutoff values for the score and orthogonal distances
%    result.flag     : The observations whose score distance is larger than result.cutoff.sd (==> result.flag.sd)
%                      or whose orthogonal distance is larger than result.cutoff.od (==> result.flag.od)
%                      can be considered as outliers and receive a flag equal to zero (result.flag.all).
%                      The regular observations receive a flag 1.
%    result.class   : 'CPCA' 
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at: 
%              http://wis.kuleuven.be/stat/robust.html
%
% Written by Sabine Verboven 
% Last Update: 05/04/2003

default=struct('plots',1,'k',0);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;   
counter=1;
if nargin>1
    %
    % placing inputfields in array of strings
    %
    for j=1:nargin-1
        if rem(j,2)~=0
            chklist{i}=varargin{j};
            i=i+1;
        end
    end 
    % Checking which default parameters have to be changed
    % and keep them in the structure 'options'.
    while counter<=IN 
        index=strmatch(list(counter,:),chklist,'exact');
        if ~isempty(index) % in case of similarity
            for j=1:nargin-2 % searching the index of the accompanying field
                if rem(j,2)~=0 % fieldnames are placed on odd index
                    if strcmp(chklist{index},varargin{j})
                        I=j;
                    end
                end
            end
            options=setfield(options,chklist{index},varargin{I+1});
            index=[];
        end
        counter=counter+1;
    end
end
[n,p]=size(x);
% using the optimal (least time consuming) algorithm
if n<p
	[P,T,L,r,Xc,out.M]=kernelEVD(x);
else
	[P,T,L,r,Xc,out.M]=classSVD(x);
end

if options.k==0
    screeplot(L,'CPCA');
    k=input('How many principal components would you like to retain? ');
else
    k=options.k;
end
out.k=k;
out.P=P(:,1:out.k);
out.T=T(:,1:out.k);
out.L=L(1:out.k)';

% Mahalanobis distance in classical PCA subspace
Tclas=Xc*out.P;
out.sd=sqrt(libra_mahalanobis(Tclas,zeros(size(Tclas,2),1),'cov',out.L'));
out.cutoff.sd=sqrt(chi2inv(0.975,out.k));

% Orthogonal distances to classical PCA subspace
Xtilde=Tclas*out.P';
Cdiff=Xc-Xtilde;
for i=1:n
    out.od(i)=norm(Cdiff(i,:));
end
r=rank(x);
if k~=r
    m=mean(out.od.^(2/3));
    s=sqrt(var(out.od.^(2/3)));
    out.cutoff.od = sqrt(norminv(0.975,m,s)^3); 
else
    out.cutoff.od=0;
end

%Computing flags
if k~=p
    out.flag.od=(out.od<=out.cutoff.od)';
    out.flag.sd=(out.sd<=out.cutoff.sd)';
    out.flag.all=(out.flag.od)&(out.flag.sd);
else
    out.flag.od=(out.od<=out.cutoff.od)';
    out.flag.sd=(out.sd<=out.cutoff.sd)';
    out.flag.all=out.flag.sd;
end
out.class='CPCA';

result=struct('P',{out.P},'L',{out.L},'M',{out.M},'T',{out.T},'k',{out.k},...
    'sd', {out.sd'},'od',{out.od'},'cutoff',{out.cutoff},'flag',{out.flag},...
    'class',{out.class});

try
    if options.plots 
        makeplot(result)
    end
catch %output must be given even if plots are interrupted 
    %> delete(gcf) to get rid of the menu 
end