ECG-Kit 1.0
(6,475 bytes)
%PROXM Proximity mapping
%
% W = PROXM(A,TYPE,P,WEIGHTS)
% W = A*PROXM([],TYPE,P,WEIGHTS)
% W = A*PROXM(TYPE,P,WEIGHTS)
% D = B*W
%
% INPUT
% A Dataset used for representation
% B Dataset applied to the representation set
% TYPE Type of the proximity (optional; default: 'distance')
% P Parameter of the proximity (optional; default: 1)
% WEIGHTS Weights (optional; default: all 1)
%
% OUTPUT
% W Proximity mapping
% D Dataset, proximity matrix between B and A
%
% DESCRIPTION
% Computation of the [K x M] proximity mapping (or kernel) defined by
% the [M x K] dataset A. Unlabeled objects in A are neglected.
% If B is an [N x K] dataset, then D=B*W is the [N x M] proximity matrix
% between B and A. The proximities can be defined by the following types
% (A and B are here 1 x K vectors):
%
% 'polynomial' | 'p': SIGN(A*B'+1).*(A*B'+1).^P
% 'homogeneous' | 'h': SIGN(A*B').*(A*B').^P
% 'exponential' | 'e': EXP(-(||A-B||)/P)
% 'radial_basis' | 'r': EXP(-(||A-B||.^2)/(P*P))
% 'sigmoid' | 's': SIGM((SIGN(A*B').*(A*B'))/P)
% 'distance' | 'd': ||A-B||.^P
% 'minkowski' | 'm': SUM(|A-B|.^P).^(1/P)
% 'city-block' | 'c': SUM(|A-B|)
% 'cosine' | 'o': 1 - (A*B')/||A||*||B||
% 'ndiff' | 'n': SUM(A~=B)
% 'hellinger' | 'g': ||A.^0.5-B.^0.5||
%
% In the polynomial case for a non-integer P, the proximity is computed
% as D = SIGN(S+1).*ABS(S+1).^P, in order to avoid problems with negative
% inner products S = A*B'. The features of the objects in A and B may be
% weighted by the weights in the vector WEIGHTS.
%
% Note that for computing (squared) Euclidean distances DISTM is much
% faster than PROXM.
%
% EXAMPLES
% W = A*proxm('m',1); % define L1 distances
% W = A*proxm('m',1); D = B*W; % L1 distances between B and A
% W = proxm('r',2)*mapex; D = A*W; % Distances between A and itself
%
% SEE ALSO (<a href="http://37steps.com/prtools">PRTools Guide</a>)
% MAPPINGS, DATASETS, DISTM
% Copyright: R.P.W. Duin, r.p.w.duin@37steps.com
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands
% 27-11-2008, Marc Hulsman, MR1
% Added kernel normalization.
% $Id: proxm.m,v 1.10 2010/01/15 13:39:01 duin Exp $
function W = proxm(varargin)
varargin = shiftargin(varargin,'char');
argin = setdefaults(varargin,[],'d',1,[]);
if mapping_task(argin,'definition')
W = define_mapping(argin,'untrained','Proximity mapping');
return
end
[A,type,s,weights] = deal(argin{:});
[m,k] = size(A);
if (isstr(type))
% Definition of the mapping, just store parameters.
all = char('polynomial','p','homogeneous','h','exponential','e','radial_basis','r', ...
'sigmoid','s','distance','d','minkowski','m','city-block','c',...
'cosine','o','uniform','u','ndiff','n','hellinger','g');
if (~any(strcmp(cellstr(all),type)))
error(['Unknown proximity type: ' type])
end
A = cdats(A,1);
[m,k] = size(A);
%if (isa(A,'prdataset'))
if (isdataset(A))
A = testdatasize(A);
W = prmapping(mfilename,'trained',{A,type,s,weights},getlab(A), ...
getfeatsize(A),getobjsize(A));
else
W = prmapping(mfilename,'trained',{A,type,s,weights},[],k,m);
end
W = setname(W,'Proximity mapping');
elseif isa(type,'prmapping')
% Execution of the mapping: compute a proximity matrix
% between the input data A and B; output stored in W.
w = type;
[B,type,s,weights] = deal(w.data{1},w.data{2},w.data{3},w.data{4});
[kk,n] = size(w);
if (k ~= kk)
error('Mismatch in sizes between the mapping and its data stored internally.');
end
if (~isempty(weights))
if (length(weights) ~= k),
error('Weight vector has a wrong length.');
end
A = A.*(ones(m,1)*weights(:)');
B = B.*(ones(n,1)*weights(:)');
end
testdatasize(m*n);
switch type
case {'polynomial','p'}
D = +(A*B');
D = D + ones(m,n);
if (s ~= round(s))
D = +sign(D).*abs(D).^s;
elseif (s ~= 1)
D = D.^s;
end
case {'homogeneous','h'}
D = +(A*B');
if (s ~= round(s))
D = +sign(D).*abs(D).^s;
elseif (s ~= 1)
D = D.^s;
end
case {'sigmoid','s'}
D = +(A*B');
D = sigm(D/s);
case {'city-block','c'}
D = zeros(m,n);
t = sprintf('Processing %i objects: ',n);
prwaitbar(n,t);
for j=1:n
prwaitbar(n,j,[t int2str(j)]);
D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)),2);
end
prwaitbar(0);
case {'minkowski','m'}
D = zeros(m,n);
t = sprintf('Minkowski distance for %i objects: ',n);
prwaitbar(n,t);
for j=1:n
prwaitbar(n,j,[t int2str(j)]);
D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)).^s,2).^(1/s);
end
prwaitbar(0);
case {'exponential','e'}
D = dist2(B,A);
D = exp(-sqrt(D)/s);
case {'radial_basis','r'}
D = dist2(B,A);
D = exp(-D/(s*s));
case {'distance','d'}
D = dist2(B,A);
if s ~= 2
D = sqrt(D).^s;
end
case {'cosine','o'}
D = +(A*B');
lA = sqrt(sum(A.*A,2));
lB = sqrt(sum(B.*B,2));
D = 1 - D./(lA*lB');
case {'ndiff','n'}
D = zeros(m,n);
t = sprintf('Processing %i objects: ',n);
prwaitbar(n,t);
for j=1:n
prwaitbar(n,j,[t int2str(j)]);
D(:,j) = sum(+A ~= repmat(+B(j,:),m,1),2);
end
prwaitbar(0);
case {'hellinger','g'}
D = dist2(sqrt(B),sqrt(A));
D = sqrt(D);
otherwise
error('Unknown proximity type')
end
if isdataset(A)
W = setdat(A,D,w);
else
W = D;
end
else
error('Illegal TYPE argument.')
end
return;
function D = dist2(B,A)
% Computes square Euclidean distance, avoiding large matrices for high
% dimensional data
ma = size(A,1);
mb = size(B,1);
if isdatafile(A)
D = zeros(ma,mb);
next = 1;
while next > 0
[a,next,J] = readdatafile(A,next);
D(J,:) = +dist2(a,B);
end
elseif isdatafile(B)
D = zeros(ma,mb);
next = 1;
while next > 0 % we need another version of readdatafile here, as due
[b,next,J] = readdatafile2(B,next); % to persistent variables double
D(:,J) = +dist2(A,b); % looping can not be handled correctly
end
else % A and B are not datafiles
D = ones(ma,1)*sum(B'.*B',1);
D = D + sum(A.*A,2)*ones(1,mb);
D = D - 2 .* (+A)*(+B)';
% Clean result.
J = find(D<0);
D(J) = zeros(size(J));
end
return