ECG-Kit 1.0
(7,097 bytes)
function result = adjustedoutlyingness(x,varargin)
%ADJUSTEDOUTLYINGNESS calculates the 'Skewness-Adjusted Outlyingness'.
% The method searches for outliers in multivariate skewed data
% (thus without assuming elliptical symmetric).
% It is based on the outlyingness measure of Stahel and Donoho (1981, 1982)
% and on the skewness-adjusted boxplot of Hubert and Vandervieren (2007).
%
% The Skewness-Adjusted Outlyingness is described in:
% Hubert, M., and Van der Veeken, S. (2008),
% "Outlier detection for skewed data",
% Journal of Chemometrics, 22, 235-246.
%
% The method can be useful as preprocessing in
% FASTICA (www.cis.hut.fi/projects/ica/fastica/), see
%
% Brys, G., Hubert, M., and Rousseeuw, P.J. (2005),
% "A Robustification of Independent Component Analysis",
% Journal of Chemometrics, 19, 364-375.
%
%
% Required input arguments:
% x : Data matrix (rows=observations, columns=variables)
%
% Optional input arguments:
% ndir : Number of directions in which de outlyingness will be computed
% (default = 250*number of variables)
% classic : If equal to one, the classical Stahel Donoho outlyingness is
% calculated as well (default = 0)
%
% When the number of observations n is sufficiently large compared to the
% dimension p, i.e. when n > 5*p, directions are generated orthogonal to the
% hyperplane through p random observations. The adjusted outlyingness is
% then affine invariant. When n is small compared to p, we take random
% directions through two data points. This procedure is orthogonal
% invariant.
%
% I/O:
% result=adjustedoutlyingness(x,'ndir',250);
%
% Example:
% x = [chi2rnd(2,1000,1) trnd(3,1000,1)];
% x(1:10,:) = mvnrnd([-2 -3],eye(2)/10,10);
% result = adjustedoutlyingness(x);
%
% The output of ADJUSTEDOUTLYINGNESS is a structure containing:
% result.adjout : skewness-adjusted outlyingness values for all observations
% result.cutoff : cutoff value for the AO-values
% result.flag : The observations whose AO-value exceeds the cutoff value can be
% considered as outliers and receive a flag equal to zero. The regular observations
% receive a flag 1.
% result.classic : If the input argument 'classic' is equal to one, this structure
% contains results of the classical analysis:
% .outl: Stahel-Donoho outlyingness for all observations
% .cutoff: cutoff value for the outlyingness values.
% .flag: The observations whose outlyingness exceeds the cutoff receive flag zero, the others receive flag 1.
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
% http://wis.kuleuven.be/stat/robust
%
% Written by Guy Brys, Mia Hubert, Stephan Van der Veeken, Tim Verdonck
% Last Update: 09/06/2008
if (nargin<1)
error('Input matrix x is required');
end
[n,p]=size(x);
counter=1;
default=struct('ndir',250*p,'a',-4,'b',3,'classic',0);
list=fieldnames(default);
result=default;
IN=length(list);
i=1;
%reading the user's input
if nargin>1
%
%placing inputfields in array of strings
%
for j=1:nargin-1
if rem(j,2)~=0
chklist{i}=varargin{j};
i=i+1;
end
end
%
%Checking which default parameters have to be changed
% and keep them in the structure 'result'.
%
while counter<=IN
index=strmatch(list(counter,:),chklist,'exact');
if ~isempty(index) %in case of similarity
for j=1:nargin-1 %searching the index of the accompanying field
if rem(j,2)~=0 %fieldnames are placed on odd index
if strcmp(chklist{index},varargin{j})
I=j;
end
end
end
result=setfield(result,chklist{index},varargin{I+1});
index=[];
end
counter=counter+1;
end
end
ndir=result.ndir;
a=result.a;
b=result.b;
classic=result.classic;
if (ndir<=0)
error('The number of directions should be positive.');
end
if isempty(ndir)
ndir=250*p;
end
% a and b must be numeric scalars, classic should be a binary variable
if isempty(a)
a = -4;
elseif ~isscalar(a) || ~isnumeric(a)
error('The ''a'' parameter value must be a numeric scalar.');
end
if isempty(b)
b = 3;
elseif ~isscalar(b) || ~isnumeric(b)
error('The ''b'' parameter value must be a numeric scalar.');
end
if isempty(classic)
classic = 0;
elseif ~isscalar(classic) || ~ismember(classic,0:1)
error('Invalid value for ''classic'' parameter.');
end
if n>5*p
B=[];
for i =1:ndir
xx = randperm(n);
P = x(xx(1:p),:);
if (rank(P)==p)
B = [B ; (P\ones(p,1))'];
end
end
else
B=twopoints(x,ndir,0);
end
for i=1:size(B,1)
Bnorm(i)=norm(B(i,:),2);
end
Bnormr=Bnorm(Bnorm > 1.e-12);
B=B(Bnorm > 1.e-12,:);
A=diag(1./Bnormr)*B;
%Looking in ndir directions for skewness-adjusted outlyingness
Y=x*A';
[n,p]=size(Y);
YZ=zeros(n,p);
tmc = mc(Y);
h=find(abs(tmc)==1);
if(sum(h)>1)
error('There are too many ties in the data. The adjusted outlyingness can not be computed.')
end
tme = median(Y);
tiq = iqr(Y);
s=find(tiq==0);
if(sum(s)>1)
error('There are too many ties in the data. The adjusted outlyingness can not be computed.')
end
tp1 = prctile(Y,25);
tp3 = prctile(Y,75);
if (tmc>=0)
tup = (tp3+1.5*exp(b*tmc).*tiq)-tme;%3
tlo = tme-(tp1-1.5*exp(a*tmc).*tiq);%-4
else
tup = (tp3+1.5*exp(-a*tmc).*tiq)-tme;%4
tlo = tme-(tp1-1.5*exp(-b*tmc).*tiq);%-3
end
for k = 1:p
ttup = sort(-Y(Y(:,k)<(tup(k)+tme(k)),k));
tup(k) = -ttup(1)-tme(k);
ttlo = sort(Y(Y(:,k)>(tme(k)-tlo(k)),k));
tlo(k) = tme(k)-ttlo(1);
end
tmp = ((Y>=repmat(tme,n,1)).*(repmat(tup,n,1)) + (Y<repmat(tme,n,1)).*(repmat(tlo,n,1)));
YZ = abs((Y-repmat(tme,n,1)))./tmp;
adjout = max(YZ,[],2);
mcadjout=mc(adjout);
if mcadjout>0
cutoff = prctile(adjout,75)+1.5*exp(b*mcadjout)*iqr(adjout);
else
cutoff = prctile(adjout,75)+1.5*iqr(adjout);
end
ttup=sort(-adjout(adjout<cutoff));
cutoff=-ttup(1);
flag=(adjout<=cutoff);
if classic==1
% The classical estimates are computed.
umad=madc(Y);
g=find(umad==0);
if(sum(g)>1)
error('There are too many ties in the data. The Stahel-Donoho outlyingness can not be computed.')
end
clYZ=abs((Y-repmat(tme,n,1)))./repmat(umad,n,1);
classout=max(clYZ,[],2);
%classical chi-square cutoff and flag
quant=chi2inv(0.99,size(x,2));
classcutoff=sqrt(quant);
classflag=(classout<=classcutoff);
classic=struct('outl',classout,'cutoff',classcutoff,'flag',classflag);
end
%Putting things together
result = struct('adjout',adjout,'cutoff',cutoff,'flag',flag,'classic',classic);