ECG-Kit 1.0

File: <base>/common/prtools/featsellr.m (9,319 bytes)
%FEATSELLR Plus-L-takeaway-R feature selection for classification
% 
%  [W,RES] = FEATSELLR(A,CRIT,K,L,R,T)
%
% INPUT	
%   A     Dataset
%   CRIT  String name of the criterion or untrained mapping 
%         (optional; default: 'NN', i.e. 1-Nearest Neighbor error)
%   K     Number of features to select
%         (optional; default: return optimally ordered set of all features)
%   L     Number of features to select at a time (plus-L, default: 1), L ~= R
%   R     Number of features to deselect at a time (takeaway-R, default: 0)
%   T     Tuning set (optional)
%   N     Number of cross-validations (optional)
%
% OUTPUT
%   W     Output feature selection mapping
%   RES   Matrix with step-by-step results of the selection
%
% DESCRIPTION
% Floating selection of K features using the dataset A, by iteratively 
% selecting L optimal features and deselecting R. Starts from the full
% set of features when L < R, otherwise from the empty set. CRIT sets the 
% criterion used by the feature evaluation routine FEATEVAL. If the dataset
% T is given, it is used as a tuning set for FEATEVAL. Alternatively
% a number of cross-validations N may be supplied. For K = 0, the optimal
% feature set (maximum value of FEATEVAL) is returned. The result W can be
% used for selecting features by B*W. In this case, features are ranked
% optimally. 
% The selected features are stored in W.DATA and can be found by +W.
% In R, the search is reported step by step as:
% 
% 	RES(:,1)            : number of features
% 	RES(:,2)            : criterion value
% 	RES(:,3:3+max(L,R)) : added / deleted features
%
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% MAPPINGS, DATASETS, FEATEVAL, FEATSEL
% FEATSELO, FEATSELB, FEATSELF, FEATSELI, FEATSELP, FEATSELM, PRPROGRESS

% Copyright: D. de Ridder, D.deRidder@ewi.tudelft.nl
% Faculty of Electrical Engineering, Mathematics and Computer Science
% Delft University of Technology, Mekelweg 4, 2628 CD Delft, The Netherlands

% $Id: featsellr.m,v 1.6 2009/11/27 08:53:00 duin Exp $

function [w,res] = featsellr(a,crit,ksel,l,r,t,fid)

		
	if (nargin < 2) | isempty(crit)
		prwarning(2,'No criterion specified, assuming 1-NN.');
		crit = 'NN';
	end
	if (nargin < 3) | isempty(ksel)
		ksel = 0; 		% Consider all the features and sort them.
	end
	if (nargin < 4) | isempty(l) 
		l = 1;
	end;
	if (nargin < 5) | isempty(r)
		r = 0;
	end;
	if (nargin < 6)
		prwarning(3,'No tuning set supplied.');
		t = [];
	end
	if (nargin < 7)
		fid = [];
	end

	if (l==r)
		error('In ''Plus-L-takeaway-R feature selection'' L should be unequal to R')
	end
	
	% No inputs arguments provided, return an untrained mapping.
	if (nargin == 0) | (isempty(a))
		w = prmapping('featsellr',{crit,ksel,t});
		w = setname(w,'+L-R FeatSel');
		return
	end

	isvaldfile(a,1,2);               % At least 1 object per class, 2 classes.
	a = testdatasize(a);
	a = setprior(a,getprior(a));
	if ~is_scalar(t), iscomdset(a,t); end

	[m,k,c] = getsize(a);
	featlist = getfeatlab(a);

    % Initialise counters.
	state.l = l; state.r = r;
	if (state.l > state.r)                  % Start from empty set.
		state.I = [];   			        % Pool of selected feature indices.
		state.critval_opt = 0;              % Maximum criterion value found so far.
		state.res = zeros(0,2+max(l,r));    % Report of selection process.
	else
   	state.I = [1:k];			                            % Pool of the feature indices to be used.
		state.critval_opt = feateval(a,crit,t);                 % Max. criterion found so far.
   	state.res = [k,state.critval_opt,zeros(1,max(l,r))];    % Report of selection process. 
	end;
	state.I_opt = state.I; state.critval = [];

    % Evaluate the criterion function for the entire dataset A.

	prprogress(fid,['\nfeatsellr: Plus-' num2str(l) '-takeaway-' num2str(r) ...
			' feature selection\n'])
	if ksel == 0
		if (l > r) 
			s = sprintf('Forward search of optimal feature set out of %i: ',k);
		else
			s = sprintf('Backward search of optimal feature set out of %i: ',k);
		end
		nsize = k;
	else
		s = sprintf('Selection of %i features: ',ksel);
		nsize = abs(length(state.I)-ksel);
	end
	prwaitbar(nsize,s);
	
	while (ksel == 0 & (((state.l > state.r) & (length(state.I) <= k-state.l)) | ...
                        ((state.l < state.r) & (length(state.I) > state.r)))) | ... 
          (ksel > 0  & state.l > state.r & length(state.I) < ksel) | ...
          (ksel > 0  & state.l < state.r & length(state.I) > ksel)

        % Calculate criterion values C for the features in I and find the feature 
        % giving the maximum. 
		if (l > r)
			state = plusl(a,crit,t,state,fid);
			if (r > 0), state = takeawayr(a,crit,t,state,fid); end;
			prwaitbar(nsize,length(state.I),[s int2str(length(state.I))]);
		else
			state = takeawayr(a,crit,t,state,fid);
			if (l > 0), state = plusl(a,crit,t,state,fid); end;
			prwaitbar(nsize,k-length(state.I),[s int2str(length(state.I))]);
		end;    
        
	end;
	prwaitbar(0);
    
    % Make sure we end up with exactly KSEL features.

	if (ksel == 0) & (state.l > state.r) & (length(state.I) < k)
		state.l = k - length(state.I);
		state = plusl(a,crit,t,state,fid);
	elseif (ksel == 0) & (state.l < state.r) & (length(state.I) > 1)
		state.r = length(state.I) - 1;
		state = takeawayr(a,crit,t,state,fid);
	elseif (ksel > 0) & (state.l > state.r) & (length(state.I) > ksel)
		state.r = length(state.I) - ksel;
		state = takeawayr(a,crit,t,state,fid);
	elseif (ksel > 0) & (state.l < state.r) & (length(state.I) < ksel)
		state.l = ksel - length(state.I);
		state = plusl(a,crit,t,state,fid);
	end;
    
	if (ksel ~= 0), state.I_opt = state.I; end;

	w = featsel(k,state.I_opt);
  w = setmapping_type(w,'trained');
  w = setsize(w,[k length(state.I_opt)]);
	if ~isempty(featlist)
		w = setlabels(w,featlist(state.I_opt,:));
	end
	w = setname(w,'+L-R FeatSel');

	res = state.res;
	prprogress(fid,'featsellr finished\n')
	
return;

function state = plusl(a,crit,t,state,fid)

    % J contains the indices of the nonselected features.
	[m,k,c] = getsize(a); J = setdiff(1:k,state.I);

	critval_opt = 0; Jsub_opt = [];

    % Find all possible choices of L indices out of J.    
	[Jsub,ind] = npickk(J,state.l);
	while (~isempty(Jsub))
        % Calculate the criterion when subset JSUB is added.
		if (isempty(t))
			critval = feateval(a(:,[state.I Jsub]),crit);
		elseif is_scalar(t)
			critval = feateval(a(:,[state.I Jsub]),crit,t);
		else
			critval = feateval(a(:,[state.I Jsub]),crit,t(:,[state.I Jsub]));
		end;
        % Store the best subset and its criterion value.
		if (critval > critval_opt)
			critval_opt = critval;
			Jsub_opt    = Jsub;
		end;
		[Jsub,ind] = npickk(J,state.l,ind);
	end;
    
	state.I = [state.I Jsub_opt];

	if (critval_opt > state.critval_opt) | ...
			(critval_opt == state.critval_opt & ...
			length(state.I) < length(state.I_opt)), 
		state.critval_opt = critval_opt; 
		state.I_opt = state.I;  
	end;
    
	line = [length(state.I),critval_opt,...
				[Jsub_opt zeros(1,size(state.res,2)-2-length(Jsub_opt))]];
	prprogress(fid,'  %d %f \n',line(1:2)); 
	state.res = [state.res; line];

return

function state = takeawayr(a,crit,t,state,fid)

    % J contains the indices of the selected features.
	[m,k,c] = getsize(a); J = state.I;

	critval_opt = 0; Jsub_opt = [];

    % Find all possible choices of L indices out of J.    
	[Jsub,ind] = npickk(J,state.r);
	while (~isempty(Jsub))
        % Calculate the criterion when subset JSUB is removed.
		if (isempty(t))
			critval = feateval(a(:,[setdiff(state.I,Jsub)]),crit);
		elseif is_scalar(t)
			critval = feateval(a(:,[setdiff(state.I,Jsub)]),crit,t);
		else
			critval = feateval(a(:,[setdiff(state.I,Jsub)]),crit,...
                               t(:,[setdiff(state.I,Jsub)]));
		end;
        % Store the best subset and its criterion value.
		if (critval > critval_opt)
			critval_opt = critval;
			Jsub_opt    = Jsub;
		end;
		[Jsub,ind] = npickk(J,state.r,ind);
	end;
    
	state.I = setdiff(state.I,Jsub_opt);

	if (critval_opt > state.critval_opt) | ...
			(critval_opt == state.critval_opt & ...
			length(state.I) < length(state.I_opt)), 
		state.critval_opt = critval_opt; 
		state.I_opt = state.I;  
	end;

	line = [length(state.I),critval_opt,...
            [-Jsub_opt zeros(1,size(state.res,2)-2-length(Jsub_opt))]];
	prprogress(fid,'  %d %f \n',line(1:2)); 
	state.res = [state.res; line];
    
return

% NPICKK - A kind-of-recursive implementation of NCHOOSEK. 
%
% Picks K elements out of vector V and returns them in R, with their indices
% in I. Initialise with [R,I] = NPICKK(V,K); subsequent calls should be
% [R,I] = NPICKK(V,K,I). If the set is exhausted, returns R = I = [].

function [r,i] = npickk(v,k,i)

    n = length(v);

    if (nargin < 3) | (isempty(i))
        i = 1:k; j = 1;
    else
        % Increase the last index.
        j = k; i(j) = i(j) + 1; 
        % While there's an overflow, increase the previous index
        % and reset the subsequent ones.
        while (i(j) > n-(k-j)) & (j >= 2)
            i(j-1) = i(j-1) + 1; 
            for l = j:k
                i(l)   = i(l-1) + 1;
            end;
            j = j - 1;
        end;
    end;

    % If the last index is too large, we're done.
    if (i(end) > n)
        r = []; i = [];
    else
        r = v(i);
    end;
    
return