ECG-Kit 1.0

File: <base>/common/LIBRA/mona.m (5,938 bytes)
function result=mona(x,banner)

%MONA returns clusters according to mona (monothetic
%clustering)
%
%The algorithm is fully described in:
%   Kaufman, L. and Rousseeuw, P.J. (1990),
%   "Finding groups in data: An introduction to cluster analysis",
%   Wiley-Interscience: New York (Series in Applied Probability and
%   Statistics), ISBN 0-471-87876-6.
%
% Required input argument:
%  x : Data matrix (rows = observations, columns = variables)
%      containing only binary values.
%      The missing values are indicated by the value 2
%
% Optional input argument:
%  banner : draws picture
%       Possible values are 0 : do not create a banner
%                           1 : create a banner
%       (in case banner is not given, banner = 0)
% I/O:
%   result=mona(x,1)
%
% Example (subtracted from the referenced book)
%   load animal.mat
%   result=mona(animal,1);
%
% The output of MONA is a structure containing
%   result.matrix     : revised matrix xx (contains only 0 and 1 and missing
%                       values are estimated)
%   result.number     : number of observations
%   result.var        : number of variables
%   result.ner        : order of objects
%   result.lava       : variable used for separation
%   result.nban       : seperation step
%                       (The value on the ith index is the stepnumber from
%                       the seperation of the elements with index 1 to i
%                       from the elements with index i+1 to result.number)
%                       If it equals zero, then there was no separation
%
% And MONA will create the plot banner if banner equals 1.
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
%              http://wis.kuleuven.be/stat/robust.html
%
% Written by Wai Yan Kong (May 2006)

resl=[];

% Check whether the number of input is correct

if (nargin<1)
    error('One input argument required (datamatrix)')
elseif (nargin<2)
    banner=0;
elseif (nargin>2)
    error('Too many input arguments')
end

% Define number of observations and variables

S=size(x);
S1=S(1);
S2=S(2);

resl.number=S(1);
resl.var=S(2);

% Check whether x is a matrix containing only binary values
% and whether x has missing values
% We make a revised matrix xx

missing=zeros(1,S2);
for j = 1:S2
    for i= 1:S1
        if (x(i,j)==0 | x(i,j)==1)
            missing(j)=missing(j);
            xx(i,j)=x(i,j);
        elseif (x(i,j)==2)
            missing(j)=missing(j)+1;
            for k=1:S2
                a=0;
                b=0;
                c=0;
                d=0;

                if k~=j
                    if x(i,k) ~= 2

                        for t=1:S1
                            if (x(t,j)==1 & x(t,k)==1)
                                a=a+1;
                            elseif (x(t,j)==1 & x(t,k)==0)
                                b=b+1;
                            elseif (x(t,j)==0 & x(t,k)==1)
                                c=c+1;
                            elseif (x(t,j)==0 & x(t,k)==0)
                                d=d+1;
                            end
                        end
                    end
                end
                association(k)=abs(a*d-b*c);
                resinbetween(k)=a*d-b*c;
                [C,I]=max(association);

                if resinbetween(I)>0
                    xx(i,j)=x(i,I);
                elseif resinbetween(I)<0
                    xx(i,j)=1-x(i,I);
                end
            end
        else
            error('inputmatrix must have binary values or value 2 for missing values')
        end
    end
end

TotalMissing=sum(missing);
fprintf(1,'This inputmatrix has %d missing values\n',TotalMissing)

% Check situations where Mona is not applicable

One=0;
for j=1:S2
    if missing(j)>=1
        One=One+1;
    end
end
if One==S2
    error('each variable has at least one missing value')
end


for j=1:S2

    if (missing(j)>= (S1/2))
        fprintf(1,'Variable %d has %d missing values\n',j,missing(j))
        error('The number of missing values for some variable equals or is more than half of the number of objects')
    end


    gelijk=0;
    for s=1:S1-1
        if xx(s,j)==xx(s+1,j)
            gelijk=gelijk+1;
        end
    end
    if gelijk==S1-1
        fprintf(1,'Variable %d has identical values\n',j)
        error('all values are identical for some variable')
    end

end

% We make the rowvector kx by reading the revised matrix xx
% row by row

Lengte=S1*S2;
kx=zeros(1,Lengte);

for i = 1:S1
    for j= 1:S2
        kx((i-1)*S2+j) = xx(i,j);
    end
end


% Actual calculations

[ner,lava,nban]=monac(resl.number,resl.var,kx);

% We want lava and nban to be vectors of length S1-1

lava2=zeros(1,S1-1);
nban2=zeros(1,S1-1);

for i = 1:S1-1
    lava2(i) = lava(i+1);
    nban2(i) = nban(i+1);
end

% Create a banner

if (banner==1)
    whitebg([1 1 1]);
    stepmax=max(nban2)+1;
    Y=nban2;
    Y(find(Y==0))=stepmax;
    Y1=fliplr(Y);
    barh(Y1,1)
    title 'Banner of Mona'
    xlabel('Separation Step')
    XT=0:stepmax;
    set(gca,'XTick',XT);
    set(gca,'XTickLabel',XT);
    axis([0,stepmax,0.5,S1-0.5])
    ylabel('Objects')
    La1=num2str(lava2);
    La2=fliplr(La1);
    for k=1:S1-1
        La3(k)=La2(k+((k-1)*2));
    end
    for i=1:S1-1
        if(Y1(i)<stepmax)
            text(Y1(i)+0.3,i,La3(i))
        end
    end
    YT=0.5:S1;
    set(gca,'YTick',YT);
    set(gca,'YTickLabel',fliplr(ner));

elseif ((banner~=0) & (banner~=1) & (nargin==2))
    error('banner must equals 0 or 1')
end


% Putting things together

result = struct('Matrix',xx,'Observations',S1,'Variables',S2,...
    'Objectorder',ner,'UsedVariable',lava2,'SeperationStep',nban2);