ECG-Kit 1.0

File: <base>/common/LIBRA/diana.m (7,009 bytes)
function result = diana(x,vtype,stdize,metric,banner)

%DIANA is a divisive clustering algorithm. It returns a hierarchy of clusters 
%
% The algorithm is fully described in:
%   Kaufman, L. and Rousseeuw, P.J. (1990),
%   "Finding groups in data: An introduction to cluster analysis",
%   Wiley-Interscience: New York (Series in Applied Probability and
%   Statistics), ISBN 0-471-87876-6.
%
% Required input arguments:
%       x : Data matrix (rows = observations, columns = variables)
%           or Dissimilarity matrix (if number of rows equals 1)
%   vtype : Variable type vector (length equals number of variables)
%           Possible values are 1  Asymmetric binary variable (0/1)
%                               2  Nominal variable (includes symmetric binary)
%                               3  Ordinal variable
%                               4  Interval variable
%          (if x is a dissimilarity matrix, vtype is not reauired)
%   
% Optional input arguments:
%   stdize : standardise the variables given by the x-matrix
%            Possible values are 0 : no standardisation (default)
%                                1 : standardisation by the mean
%                                2 : standardisation by the median
%            (if x is a dissimilarity matrix, stdize is ignored)
%   metric : Metric used to calculate the dissimilarity matrix
%            Possible values are 0 : Euclidian (all interval variables, default)
%                                1 : Manhattan
%                                2 : Mixed (not all interval variables, default)
%            (if x is a dissimilarity matrix, metric is ignored)
%   banner : draws picture
%            Possible values are 0 : do not create a banner (default)
%                                1 : create a banner
% I/O:
%   result=diana(x,vtype,metric,stdize,banner)
%
% Example (subtracted from the referenced book)
%   load agricul.mat
%   result=diana(agricul,[4 4],0,0,1);
%
% The output of DIANA is a structure containing:
%   result.x          : inputmatrix x
%   result.diss       : whether the inputmatrix x is a dissimilarity matrix
%                       or not
%   result.dys        : calculated dissimilarities (read row by row from the
%                       lower dissimilarity matrix, without the elements of
%                       the diagonal)
%   result.metric     : metric used
%   result.stdize     : standardisation used
%   result.number     : number of observations
%   result.objectorder: order of objects
%   result.heights    : diameter of cluster before deviding it
%                       (=length of banner)
%   result.dc         : divisive coefficient
%   result.merge      : a (n-1) by 2 matrix related to the merge
%
%
% And DIANA will create the plot banner if banner equals 1.
%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at:
%              http://wis.kuleuven.be/stat/robust.html
%
% Written by Wai Yan Kong 
% Created on 05/2006
% Last Revision: 19/09/2006  

res1=[];

%Checking and filling in the inputs
if (nargin<1)
    error('One input argument required (data or dissimilarity matrix)')
elseif ((nargin<2) & (size(x,1)~=1))
    error('Two input arguments required (datamatrix x and vtype)')
    % so, only datamatrix x as input
elseif (nargin<2)
    metri ='unknown';
    metr='unknown';
    stdize = 0;
    banner = 0;
    % so, only dissim matrix x as input
elseif (nargin<3)
    stdize = 0;
    banner = 0;
    if (sum(vtype)~=4*size(x,2))
        metr ='mixed';
        metri='mixed';
    else
        metr ='eucli';
        metri='euclidean';
    end
    % so, only datamatrix or dissimilarity matrix x and vtype
    % as input
elseif (nargin<4)
    banner = 0;
    if (sum(vtype)~=4*size(x,2))
        metr ='mixed';
        metri='mixed';
    else
        metr ='eucli';
        metri='euclidean';
    end
    % so, only datamatrix or dissimilarity matrix x, vtype and
    % stdize as input
elseif (nargin<5)
    banner = 0;
elseif (nargin>5)
    error('Too many input arguments')
end

% defining metric (for 4 input arguments) and diss
if (nargin>=4)
    if (metric==0)
        metr='eucli';
        metri='euclidean';
    elseif (metric==1)
        metr='manha';
        metri='manhattan';
    elseif (metric==2)
        metr='mixed';
        metri='mixed';
    else
        error('metric must be 0,1 or 2')
    end
end

if ((size(x,1)~=1))
    diss=0;
    dissi='x is no dissimilarity matrix';
else
    diss=1;
    dissi='x is a dissimilarity matrix';
end

%Standardization
if ((stdize==1) & (metr=='eucli'| metr=='manha') & (diss==0))
    x = ((x - repmat(mean(x),size(x,1),1))./(repmat(std(x),size(x,1),1)));
    standardisation='standardisation by mean';
elseif ((stdize==2) & (metr=='eucli' | metr=='manha') & (diss==0))
    x = ((x - repmat(median(x),size(x,1),1))./(repmat(mad(x),size(x,1),1)));
    standardisation='standardisation by median';
elseif(stdize==0)
    standardisation='no standardisation';
elseif (stdize==1 | stdize==2)
    standardisation='no standardisation (not enough num var or x is a diss matrix)';
elseif (nargin<=2)
    standardisation='no standardisation';
else
    error('stdize must be 0,1 or 2');
end

% defining dissimilarity matrix and number
if (diss==1)
    disv=x;
    number=(1+sqrt(1+8*size(x,2)))/2; %number of observations
    % checking for missing values in the dissimilarity matrix
    if any(isnan(disv))
        error('There are missing value(s) in the dissimilarity matrix!')
    end
    % checking the dimensions of the dissimilarity matrix
    if mod(number,fix(number))~=0
        error(['The dimension of the dissimilarity matrix is not correct!'])
    end
else
    resl=daisy(x,vtype,metr);
    disv=resl.disv;
    number=size(x,1);
end

%Actual calculations
[ner,ban,coef,merge,dys]=twinsc(number,[0 disv]',1,2);

% We want ban to be a vector of length n-1
ban2=zeros(1,(number-1));
for i = 1:(number-1)
    ban2(i) = ban(i+1);
end

% We want merge to be a (n-1) by 2 matrix
merge2=ones(number-1,2);
for i = 1:(number-1)
    merge2(i,:) = merge(2*i-1:2*i);
end

% Create a banner
if (banner==1)
    Y=ban2;
    Y1=fliplr(Y);
    whitebg([0.4 0.5 0.75]);
    set(gcf,'Color',[0.8 0.8 0.8]);
    b=barh(Y1,1,'w');
    title 'Banner of Diana' ;
    xlabel('Height');
    set(gca,'XDir','reverse');
    YT=0.5:number;
    set(gca,'YTick',YT);
    set(gca,'YTickLabel',fliplr(ner));
    axis([min([ban2 0]),max([ban2 0]),0.5,number-0.5]);
elseif ((banner~=0) & (banner~=1) & (nargin==5))
    error('banner must equals 0 or 1')
end


%Putting things together
result = struct('x',x,'diss',dissi,'dys',dys,'metric',metri,...
    'stdize',standardisation,'number',number,...
    'objectorder',ner,'heights',ban2,'dc',coef,'merge',merge2);
if diss
    result=rmfield(result, 'x');
end