ECG-Kit 1.0

File: <base>/common/prtools/modeclust.m (4,119 bytes)
%MODECLUST KNN mode-seeking clustering
% 
% 	LAB = MODECLUST(A,K)
% 
% INPUT
%   A       Dataset
%   K       Number of neighbours to search for local mode
%
% OUTPUT
  %   LAB   Indices of modal samples
%
% DESCRIPTION
% A K-NN modeseeking method is used to assign objects to their nearest mode.
% Object densities are defined by one over the distance to the K-th nearest 
% neighbour. Clusters are defined by recursively jumping for every object 
% to object with the highest density in the local neighborhood.
%
% K can also be a vector of neighborhood sizes, which is much faster.
% Default K: a set of values determined by a geometric series.
%
% EXAMPLE
% delfigs
% a = gendatm(1000);      % generate 1000 objects in 8 
% lab = modeclust(a); 
% for j=1:size(lab,2)
%   nclust = numel(unique(lab(:,j)));
%   if nclust < 20 & nclust > 1
%     figure; scatterd(prdataset(a,lab(:,j)));
%     title(['Number of clusters: ' int2str(nclust)]);
%   end
% end
% showfigs
% 
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% MAPPINGS, DATASETS, PRKMEANS, HCLUST, KCENTRES, PROXM

% Copyright: R.P.W. Duin, r.p.w.duin@37steps.com
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands

function labels = modeclust(a,K)

	% prepare data
  a = +a;                 % get rid of possible PRTools dataset structure
	m = size(a,1);          % number of objects
	if (nargin < 2  | isempty(K))
    K = intgeos(2,1.2,m); % default set of nb-sizes (neighborhood-sizes)
  end
  nk = numel(K);          % number of neighborhood sizes (clusterings)
	f = zeros(m,nk);        % space for storing densities
  batchsize = 100;        % batch sizes (avoid large distance matrices)
  nbatch = ceil(m/batchsize); % number of batches
  
  % first loop over the data: compute densities for every nb-size
	t = sprintf('Computing %i densities: ',m);
	prwaitbar(m,t);
  for j=1:nbatch
		prwaitbar(m,j*batchsize,[t num2str(j*batchsize)]);
    batch = (j-1)*batchsize+1:j*batchsize;  % take a subset of the objects
    if any(batch>m)              % arrange last batch
      batch = batch(batch <= m);
    end
		prwaitbar(m,(j-1)*batchsize,[t num2str((j-1)*batchsize)]);
    dj = distm(a(batch,:),a);    % distances between batch and all data
    [dd,L] = sort(dj,2);         % sort them per object
    f(batch,:) = 1./dd(:,K);     % density estimates for all nb-sizes
  end
  prwaitbar(0);
  
  % second loop: find indices of maximum density point in every nb
  NN = zeros(m,nk);              % space for storing these indices
	t = sprintf('Checking %i neighborhoods: ',m);
	prwaitbar(m,t);
  for j=1:nbatch
		prwaitbar(m,j*batchsize,[t num2str(j*batchsize)]);
    batch = (j-1)*batchsize+1:j*batchsize;   % take a subset of the objects
    if any(batch>m)              % arrange last batch
      batch = batch(batch <= m)';
    end
    mj = numel(batch);           % number of data points in batch
    dj = distm(a(batch,:),a);    % recomputes distances
    [dd,J] = sort(dj,2);         % sort them per object
    for n=1:nk                   % run over all nb's
      ff = f(:,n);               % densities for this nb-size
      [dummy,I] = max(reshape(ff(J(:,1:K(n))),mj,K(n)),[],2);
      NN(batch,n)  = J([1:mj]'+(I-1)*mj); % for all points the index of
    end                          %  point with maximum local density 
  end
  prwaitbar(0);
  
  % third loop: follow gradient to density modes
	t = sprintf('Computing %i clusterings: ',nk);
	prwaitbar(nk,t);
  for n=1:nk
		prwaitbar(nk,n,[t num2str(n)]);
		N = NN(:,n);                 % initial nb points with maximum density
		% Re-assign samples to the sample their nearest neighbour is assigned to.
		% Iterate until assignments don't change anymore. Samples that then point 
		% to themselves are modes; all other samples point to the closest mode.
		M = N(N);
    while (any(M~=N))
			N = M; M = N(N);
    end
    labels(:,n) = M;  % labels are the indices of the modes to which data 
		                  % points are assigned to. 
    % Data points assigned to the same mode constitute a cluster
	end
	prwaitbar(0);

return