ECG-Kit 1.0

File: <base>/common/prtools/parzenml.m (5,872 bytes)
%PARZENML Optimum smoothing parameter in Parzen density estimation.
% 
%   H = PARZENML(A)
% 
% INPUT	
%   A    Input dataset
%
% OUTPUT
%   H    Scalar smoothing parameter (in case of crisp labels)
%        Vector with smoothing parameters (in case of soft labels)
%
% DESCRIPTION
% Maximum likelihood estimation for the smoothing parameter H in the 
% Parzen denstity estimation of the data in A. A leave-one out 
% maximum likelihood estimation is used. 
%
% The dataset A can either be crisp or soft labeled. In case of crisp
% labeling the class information is not used and a single smoothing 
% parameter is estimated. In case of soft labels a smoothing parameter
% for every class is estimated and objects are weighted in relation to
% their class weigthts (soft label value). 
% It may be profitable to scale the data before calling it. eg. 
% WS = SCALEM(A,'variance'); A = A*WS.
% 
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% DATASETS, MAPPINGS, SCALEM, SELDAT, PARZENM, PARZENDC, PRPROGRESS

% Copyright: R.P.W. Duin, r.p.w.duin@37steps.com
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands

% $Id: parzenml.m,v 1.11 2010/03/25 15:39:46 duin Exp $

function h = parzenml(A,fid)

		
	if nargin < 2, fid = []; end

	if isdouble(A), A = prdataset(A); end
	
	A = testdatasize(A);
	A = testdatasize(A,'objects');

	if islabtype(A,'crisp')
		h = parzenmlc(A,fid);
	elseif islabtype(A,'soft')
		h = parzenmls(A,fid);
	else
		error('Label type should be either ''crisp'' or ''soft''')
	end
	
	return
	
function h = parzenmlc(A,fid) %crisp version

	[m,k] = size(A);
	DD= distm(+A) + diag(1e70*ones(1,m));
	E = min(DD);
	
	h1 = sqrt(max(E));    % initial estimate of h
	F1 = derlc(DD,E,h1,k); % derivative

	prprogress(fid,'parzenml:\n');
	prprogress(fid,' %6.4f   %6.3e\n',h1,F1);
	if abs(F1) < 1e-70 
		h = h1;
		prwarning(4,'jump out\n');
		return;
	end
	
	a1 = (F1+m*k)*h1*h1;
	h2 = sqrt(a1/(m*k));  % second guess
	F2 = derlc(DD,E,h2,k); % derivative

	prprogress(fid,' %6.4f   %6.3e\n',h2,F2);
	if (abs(F2) < 1e-70) | (abs(1e0-h1/h2) < 1e-6) 
		h = h2;
		prwarning(4,'jump out\n');
		return
	end
	
	% find zero-point of derivative to optimize h^2
	% stop if improvement is small, or h does not change significantly
	
	alf = 1;
	prwaitbar(100,'parzenml: Optimizing smoothing parameter',m > 100)
	iter = 0;
	while abs(1e0-F2/F1) > 1e-4 & abs(1e0-h2/h1) > 1e-3 & abs(F2) > 1e-70
		iter = iter+1;
		h3 = (h1*h1*h2*h2)*(F2-F1)/(F2*h2*h2-F1*h1*h1);
		if h3 < 0 % this should not happen
			h3 = sqrt((F2+m*k)*h2*h2/(m*k));
		else
			h3 = sqrt(h3);
		end
		prwaitbar(100,100-100*exp(-iter/10));
		h3 = h2 +alf*(h3-h2);
		F3 = derlc(DD,E,h3,k);
		prprogress(fid,' %6.4f   %6.3e\n',h3,F3);
		F1 = F2; F2 = F3;
		h1 = h2; h2 = h3;
		alf = alf*0.99; % decrease step size
	end
	h = h2;
	prwaitbar(0);

return

function F = derlc(DD,E,h,k) % crisp version
	% computation of the likelihood derivative for Parzen density
	% given distances D and their object minima E (for increased accuracy)
	m = size(DD,1);
	warning off MATLAB:divideByZero;
		Y = (DD-repmat(E,m,1))/(2*h*h); % correct for minimum distance to save accuracy
	warning on MATLAB:divideByZero;
	IY = find(Y<20);                % take small distance only, others don't contribute
	P = zeros(m,m);
	P(IY) = exp(-Y(IY));
	PP = sum(P,2)';
	FU = repmat(realmax,1,m);
	J = find(PP~=0); 
	FU(J) = 1./PP(J);
	FF = sum(DD.*P,2);
	warning off MATLAB:divideByZero;
		F = (FU*FF)./(h*h) - m*k;
	warning on MATLAB:divideByZero;
return

function h = parzenmls(A,fid) %soft version

	SS = gettargets(setlabtype(A,'soft'));
	[m,k,c] = getsize(A);
	DD= distm(+A) + diag(1e70*ones(1,m));
	E = min(DD);
	h = zeros(c,1);
	h0 = sqrt(max(E));    % initial estimate of h
	
	
	s = sprintf('parzenml: runover classes');
	prwaitbar(c,s,m > 100);
	iter = 0;
	
	for j=1:c
		prwaitbar(c,j)
		S = SS(:,j);
		h1 = h0;
		F1 = derls(DD,E,h1,k,S); % derivative

	  prprogress(fid,'parzenml: class %i : \n',j);
		prprogress(fid,' %6.4f   %6.3e\n',h1,F1);
		if abs(F1) < 1e-70 
			h(j) = h1;
			prwarning(4,'jump out\n');
			break;
		end
	
		a1 = (F1+m*k)*h1*h1;
		h2 = sqrt(a1/(m*k));  % second guess
		F2 = derls(DD,E,h2,k,S); % derivative

		prprogress(fid,' %6.4f   %6.3e\n',h2,F2);
		if (abs(F2) < 1e-70) | (abs(1e0-h1/h2) < 1e-6) 
			h(j) = h2;
			prwarning(4,'jump out\n');
			break;
		end
	
		% find zero-point of derivative to optimize h^2
		% stop if improvement is small, or h does not change significantly
	
		
		prwaitbar(100,'parzenml: Optimizing smoothing parameter',m > 100)
		iter = 0;
		alf = 1;
		while abs(1e0-F2/F1) > 1e-4 & abs(1e0-h2/h1) > 1e-3 & abs(F2) > 1e-70
			iter = iter+1;
			prwaitbar(100,100-100*exp(-iter/10));
			h3 = (h1*h1*h2*h2)*(F2-F1)/(F2*h2*h2-F1*h1*h1);
			if h3 < 0 % this should not happen
				h3 = sqrt((F2+m*k)*h2*h2/(m*k));
			else
				h3 = sqrt(h3);
			end
			h3 = h2 +alf*(h3-h2);
			F3 = derls(DD,E,h3,k,S);
			prprogress(fid,' %6.4f   %6.3e\n',h3,F3);
			F1 = F2; F2 = F3;
			h1 = h2; h2 = h3;
			alf = alf*0.99; % decrease step size
		end
		prwaitbar(0)
		h(j) = h2;
	end
  prwaitbar(0)
return

function F = derls(DD,E,h,k,S) %soft version
	% computation of the likelihood derivative for Parzen density
	% given distances D and their object minima E (for increased accuracy)
	% S are the object weigths
	c = size(S,2);                  % number of classes
	m = size(DD,1);
	Y = (DD-repmat(E,m,1))/(2*h*h); % correct for minimum distance to save accuracy
	IY = find(Y<20);                % take small distance only, others don't contribute
	F = 0;
	for j=1:c
		P = zeros(m,m);
		P(IY) = exp(-Y(IY));
		PP = S(:,j)'*P';
		FU = repmat(realmax,1,m);
		J = find(PP~=0);  
		FU(J) = S(J,j)'./PP(J);
		K = find(S(:,j)==0);
		FU(K) = zeros(1,length(K));
		FF = (DD.*P)*S(:,j);
		F = F + (FU*FF)./(h*h);
	end
	F = F - sum(S(:))*k;
return