ECG-Kit 1.0
(6,277 bytes)
%SELDAT Mapping selecting a subset of dataset
%
% [B,J] = SELDAT(A,C,F,N)
% [B,J] = A*SELDAT([],C,F,N)
% [B,J] = A*SELDAT(C,F,N)
%
% INPUT
% A Dataset
% C Indexes of classes (optional; default: all)
% or cell array, see below.
% A zero value of C points to the unlabeled objects.
% F Indexes of features (optional; default: all)
% N Indices of objects extracted from classes in C
% Should be cell array in case of multiple classes
% (optional; default: all)
% D Dataset
%
% OUTPUT
% B Subset of the dataset A
% J Indices of returned objects in dataset A: B = A(J,:)
%
% DESCRIPTION
% B is a subset of the dataset A defined by the set of classes (C),
% the set of features (F) and the set of objects (N). Classes and
% features have to be identified by their index. The order of class
% names can be found by GETLABLIST(A). The index of a particular
% class can be determined by GETCLASSI. N is applied to all classes
% defined in C. Defaults: select all, except unlabeled objects.
%
% In case A is soft labeled or is a target dataset by B = SELDAT(A,C) the
% entire dataset is returned, but the labels or targets are reduced to the
% selected class (target) C.
%
% B = SELDAT(A,D)
%
% If D is a dataset that is somehow is derived from A, e.g. by selection
% and mappings, then the corresponding objects of A are retrieved by their
% object identifiers and returned into B.
%
% B = SELDAT(A,{C,LABLISTNAME})
%
% In this case C refers to the classes of the label system defined by
% LABLISTNAME, see MULTI_LABELING. See also SELCLASS.
%
% B = SELDAT(A)
%
% Retrieves all labeled objects of A.
%
% In all cases empty classes are removed.
%
% EXAMPLES
% Generate 8 class, 2-D dataset and select: the second feature, objects
% 1 from class 1, 0 from class 2 and 1:3 from class 6
%
% A = GENDATM([3,3,3,3,3,3,3,3]);
% B = SELDAT(A,[1 2 6],2,{1;[];1:3});
% or
% B = SELDAT(A,[],2,{1;[];[];[];[];1:3});
%
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% DATASETS, GENDAT, GETLABLIST, GETCLASSI, REMCLASS, SELCLASS
% Copyright: R.P.W. Duin, r.p.w.duin@37steps.com
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands
function [b,J] = seldat(varargin)
argin = shiftargin(varargin,{'vector','cell'});
argin = setdefaults(argin,[],[],[],{});
if mapping_task(argin,'definition')
b = define_mapping(argin,'fixed');
else % Evaluate
[a,clas,feat,n] = deal(argin{:});
if iscell(clas)
curn = curlablist(a);
a = changelablist(a,clas{2});
b = feval(mfilename,a,clas{1},feat,n);
b = changelablist(b,curn);
return
end
[m,k,c] = getsize(a);
allfeat = 0;
allclas = 0;
if isempty(feat), allfeat = 1; feat = [1:k]; end
if (isempty(clas) & isempty(n)) allclas = 1; clas = [1:c]; end
if isdataset(clas)
% If input D is a dataset, it is assumed that D was derived from
% A, and therefore the object identifiers have to be matched.
J = getident(clas);
L = findident(a,J);
if iscell(L)
L = cat(1,L{:});
end
b = a(L,:);
else
% Otherwise, we have to extract the right class/features and/or
% objects:
%if ~islabtype(a,'crisp') & ~allclas
% error('Class selection only possible in case of crisp labels')
%end
if max(feat) > k
error('Feature out of range');
end
%DXD: allow for selection based on class names instead of class
%indices:
if ~isa(clas,'double')
% names in cell arrays are also possible
if isa(clas,'cell')
clas = strvcat(clas);
end
% be sure we are dealing with char's here (if it were doubles,
% we were not even allowed to enter here)
if ~isa(clas,'char')
error('I am expecting class indices or names.');
end
% match the names with the lablist:
names = clas;
ll = getlablist(a);
clas = zeros(1,size(names,1));
for i = 1:size(names,1)
%DXD test if the class is present at all, otherwise an error
%occurs:
found = strmatch(names(i,:),ll);
if ~isempty(found)
clas(1,i) = found;
end
end
end
clas = clas(:)';
if max(clas) > c
error('Class number out of range')
%elseif max(clas==c+1) % allow to find unlabeled samples by clas = c+1
% clas(clas==c+1) = 0;
end
if iscell(n)
if (~(isempty(n) | isempty(clas))) & (length(n) ~= size(clas,2))
error('Number of cells in N should be equal to the number of classes')
end
else
if size(clas,2) > 1
error('N should be a cell array, specifying objects for each class');
end
n = {n};
end
% Do the extraction:
if allclas & isempty(n)
J = findnlab(a,0);
if ~isempty(J)
a(J,:) = [];
end
else
if isempty(clas) & ~isempty(n)
clas = zeros(1,length(n));
for i = 1:length(n)
if(~isempty(n(i)))
clas(1,i) = i;
end
end
end
if islabtype(a,'crisp')
J = [];
for j = 1:size(clas,2)
JC = findnlab(a,clas(1,j));
if ~isempty(n)
if max(cat(1,n{j})) > length(JC)
error('Requested objects not available in dataset')
end
J = [J; JC(n{j})];
else
J = [J; JC];
end
end
a = a(J,:);
else
labl = getlablist(a); labl = labl(clas,:);
targ = gettargets(a); targ = targ(:,clas);
[tt,nlab] = max(targ,[],2);
a = setnlab(a,1);
a = setlablist(a,labl);
a = settargets(a,targ);
a = setnlab(a,nlab);
if ~isempty(a.prior)
priora = a.prior(clas);
priora = priora/sum(priora);
a.prior = priora;
end
end
end
if allfeat
b = a;
else
b = a(:,feat);
end
end
b = setlablist(b); % reset lablist to remove empty classes
end
return;