ECG-Kit 1.0
(5,991 bytes)
% EDICON Multi-edit and condense a training set
%
% J = EDICON(D,NSETS,NITERS,NTRIES)
%
% INPUT
% D Distance matrix dataset
% NSETS Number of subsets for editing, or [] for no editing (default: 3)
% NITERS Number of iterations for editing (default: 5)
% NTRIES Number of tries for condensing, or [] for no condensing (dflt: 10)
%
% OUTPUT
% J Indices of retained samples
%
% DESCRIPTION
% Returns the set of objects J such that the nearest neigbour gives zero error
% on the remaining objects. If MODE = 0, multi-edit the dataset represented by
% distance matrix D first. D can be computed from a dataset A by A*proxm(A).
%
% REFERENCES
% Devijver, P. and Kittler, J. "Pattern recognition", Prentice-Hall, 1982.
%
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% PRDATASET, KNNC, PROXM
% Copyright: E. Pekalska and D. de Ridder, {E.Pekalska,D.deRidder}@ewi.tudelft.nl
% Faculty of Electrical Engineering, Mathematics and Computer Science
% Delft University of Technology, Mekelweg 4, 2628 CD Delft, The Netherlands
function J = edicon (D,no_subsets,max_iter,no_tries)
if (nargin < 4 | isempty(no_tries))
prwarning(4,'number of tries for condensing not specified, assuming 10');
no_tries = 10;
end;
if (nargin < 3 | isempty(max_iter))
prwarning(4,'number of iterations for editing not specified, assuming 5');
max_iter = 5;
end;
if (nargin < 2 | isempty(no_subsets))
prwarning(4,'number of subsets for editing not specified, assuming 3');
no_subsets = 5;
end;
% Extract dataset information.
nlab = getnlab(D); lablist = getlablist(D);
[m,k,c] = getsize(D); p = getprior(D);
if (~isdataset(D) | (m ~= k))
error('require a square distance matrix dataset');
end
J = 1:m; % Initialise index array.
% If requested, apply the multi-edit algorithm.
if (~isempty(no_subsets))
iter = 1;
while (iter <= max_iter)
% 1. Create NO_SUBSETS distinct subsets out of the sample indices
% remaining in J. Store the subset indices in subset_ind{.}.
% Note: this is slightly more complicated than strictly
% necessary, to enforce that each class is represented equally
% in each subset.
m = length(J); J = J(randperm(m));
subset_size = floor(m/no_subsets);
% Find the MC(J) indices of samples belonging to class J in CLASS_IND{J}.
% Also create a random permutation for class J.
for j = 1:c
class_ind{j} = find(nlab(J)==j);
mc(j) = length(class_ind{j});
perm{j} = randperm(mc(j));
end;
% Distribute the samples over the subsets, per class.
for i = 1:no_subsets
subset_ind{i} = [];
for j = 1:c
num = min(floor(mc(j)/no_subsets),length(class_ind{j}));
subset_ind{i} = [subset_ind{i}; class_ind{j}(1:num)];
class_ind{j} = class_ind{j}(num+1:end);
end;
perm_subset_ind{i} = J(subset_ind{i});
end;
% 2. In turn, classify each subset I using 1-NN on subset I+1.
drop = [];
for i = 1:no_subsets
L1 = perm_subset_ind{i};
L2 = perm_subset_ind{mod(i,no_subsets)+1};
if (length(L2)>1)
[dummy,nearest] = min(D(L2,L1));
else
nearest = 1;
end;
drop = [drop; subset_ind{i}(find(nlab(L1)~=nlab(L2(nearest))))];
end;
% 3. Discard incorrectly classified samples from J.
if (isempty(drop))
iter = iter + 1;
else
J(drop) = []; iter = 0;
end;
end;
D = D(J,J);
end;
% Condense.
if (~isempty(no_tries))
% Extract dataset information.
nlab = getnlab(D); lablist = getlablist(D);
[m,k,c] = getsize(D); p = getprior(D);
D = +D;
% The whole procedure starts from a random object and continues to add
% objects, so it is not optimal. Therefore it is repeated NO_TRIES times
% and the smallest set found is returned.
K = zeros(m,no_tries); storesz = zeros(1,no_tries);
for o = 1:no_tries
% Start with 1 sample index in STORE, the others in GRABBAG.
p = randperm(m); store = p(1); grabbag = p(2:m);
% While there are changes...
transfer = 0;
while (~transfer) & (~isempty(grabbag))
storelab = nlab(store);
grabbaglab = nlab(grabbag);
new_grabbag = [];
% For all samples in GRABBAG...
transfer = 0;
for k = 1:length(grabbag)
% ... find the nearest sample in STORE...
[dummy,z] = min(D(grabbag(k),store));
% ... if it has a different label, move it to STORE.
if (storelab(z) ~= grabbaglab(k))
store = [store; grabbag(k)];
storelab = [storelab; grabbaglab(k)];
transfer = 1;
else
new_grabbag = [new_grabbag; grabbag(k)];
end;
end;
grabbag = new_grabbag;
end
% Remember the STORE and its size for this repetition.
storesz(o) = length(store);
K(1:storesz(o),o) = store;
end
% Take the STORE with minimal size.
[minsz,minszind] = min(storesz);
J = J(K(1:minsz,minszind));
end;
return