ECG-Kit 1.0

File: <base>/common/prtools/parzen_map.m (3,780 bytes)
%PARZEN_MAP Map a dataset on a Parzen densities based classifier
% 
% 	F = PARZEN_MAP(A,W)
%
% INPUT
%   A   Dataset
%   W   Trained Parzen classifier mapping (default: PARZENC(A))
%
% OUTPUT
%   F   Mapped dataset
%
% DESCRIPTION 
% Maps the dataset A by the Parzen density based classfier W. F*sigm are the
% posterior probabilities. W should be trained by a classifier like PARZENC.
% This routine is called automatically to solve A*W, if W is trained by
% PARZENC.
% 
% The global PRMEMORY is read for the maximum size of the internally declared 
% matrix, default inf.
% 
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% MAPPINGS, DATASETS, PARZENC, TESTP

% Copyright: R.P.W. Duin, r.p.w.duin@37steps.com
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands

% $Id: parzen_map.m,v 1.5 2008/08/18 21:16:26 duin Exp $

function f = parzen_map(b,w)

		% If no mapping is supplied, train one.
	if (nargin < 2)
		w = parzenc(b); 
	end

	pars = getdata(w);
	a = pars{1}; 								% Stored training dataset.
	h = pars{2}; 								% Smoothing paramater.

	[m,k,c] = getsize(a); nlab = getnlab(a); p = getprior(a);

	% do we have priors set in the mapping?
	if length(pars)==3
	   p=pars{3};
	end
	
	% Prepare a matrix with smoothing parameters for each class and feature.

	if (size(h,1) == 1)
		h = repmat(h,c,1);
	end
	if (size(h,2) == 1)
		h = repmat(h,1,k);
	end
	
	
	if (any(size(h) ~= [c,k]))
		error('The size of the array with smoothing parameters does not match that of the training set of W.');
	end

	[mt,kt] = size(b);
	if (kt ~= k)
		error('The size of the set A does not match that of the training set of W.'); 
	end

	[num,n] = prmem(mt,m);
	f = ones(mt,c); 					% Prob. densities for each test sample and class.

	for j = 0:num-1						% Process each chunk of the test set separately.

		if (j == num-1)
			nn = mt - num*n + n;	% Last chunk may have smaller size.
		else
			nn = n;
		end
		range = [j*n+1:j*n+nn];

		% Estimate the class probabilities.

		for i = 1:c
			
			if islabtype(a,'crisp')
				I = findnlab(a,i); hh = h(i,:);		% Parameters for this class.

				% Calculate squared distances to kernel centers.
				
				D = +distm(a(I,:)./repmat(hh,length(I),1), ...
				           +b(range,:)./repmat(hh,length(range),1));
				if (length(I) > 0)
					f(range,i) = mean(exp(-D*0.5),1)';			% Apply kernel.
				end
				const = repmat(-log(length(I)),length(I),1);
			else
				hh = h(i,:);
				I = find(a.targets(:,i) > 0); % Avoid objects with zero weight
				v = a.targets(I,i);
				D = distm(+a(I,:)./repmat(hh,length(I),1), ...
                 +b(range,:)./repmat(hh,length(range),1));
				f(range,i) = exp(-D'*0.5) * v / sum(v);
				const = log(v/sum(v));
			end
			% Normalize and multiply by class prior to get a density. Add REALMIN
			% to prevent division-by-zero errors.
			f(range,i) = p(i)*f(range,i)/((sqrt(2*pi).^k)*prod(hh)+realmin);
			if (getout_conv(w) == 2) % take log of density to preserve tails
				J = find(f(range,i) <= 1e-303);
				N = find(f(range,i) >  1e-303);
				f(range(N),i) = log(f(range(N),i));
				[dm,R] = min(D(:,J)); % for zero densities use nearest neighbor only
				%f(range(J),i) = log(p(i)/((sqrt(2*pi).^k)*prod(hh)+realmin)) - dm'*0.5 + const(R);
				f(range(J),i) = log(p(i)) - k*log(sqrt(2*pi)) - sum(log(hh)) - dm'*0.5 + const(R);
				% in case of soft labels this is tricki. We just hope that the
				% weight of the nearest neighbor is sufficiently large
			end
		end
	end
	if (getout_conv(w) == 2) % scale to gain accuracy in the tails
		fmax = max(f,[],2);
		f = f - repmat(fmax,1,c);
		f = exp(f);
	else
		f = f + realmin; % avoid devision by 0 in computing posterios later
  end
	
  if isdataset(b)
    f = setdata(b,f,getlabels(w));
  end
return