ECG-Kit 1.0

File: <base>/common/LIBRA/ltsregres.m (46,157 bytes)
function [rew,raw] = ltsregres(x,y,varargin);

%LTSREGRES carries out least trimmed squares (LTS) regression, introduced in
%   
%   Rousseeuw, P.J. (1984), "Least Median of Squares Regression,"
%   Journal of the American Statistical Association, Vol. 79, pp. 871-881.
% 
% The LTS regression method minimizes the sum of the h smallest squared 
% residuals, where h must be at least half the number of observations. The 
% default value of h is roughly 0.75n (n is the total number of observations), 
% but the user may choose any value between n/2 and n.
%
% To compute the LTS estimator, the FAST-LTS algorithm is used.
% Reference: 
%  Rousseeuw, P.J. and Van Driessen, K. (2006), 
%  "Computing LTS Regression for Large Data Sets", Data Mining and
%  Knowledge Discovery, 12, 29-45.
%
%  Also available at http://www.agoras.ua.ac.be/
%
% The LTS regression method is intended for continuous variables, and assumes 
% that the number of observations n is at least 2 times the number of 
% regression coefficients p. If p is too large with respect to n, it is  
% better to first reduce p by variable selection or principal components 
% (see rpcr.m, rsimpls.m). The response variable should be univariate, otherwise 
% robust multivariate regression should be performed (see mcdregres.m).
%
% The LTS is a robust method in the sense that the estimated regression
% fit is not unduly influenced by outliers in the data, even if there are
% several outliers. Due to this robustness, we can detect outliers by their
% large LTS residuals.
%
% Required input arguments: 
%    x : Data matrix of explanatory variables (also called 'regressors'). 
%        Rows of x represent observations, and columns represent variables.  
%        Missing values (NaN's) and infinite values (Inf's) are allowed, since observations (rows) 
%        with missing or infinite values will automatically be excluded from the computations.
%    y:  A vector with n elements that contains the response variables.
%        Missing values (NaN's) and infinite values (Inf's) are allowed, since observations (rows) 
%        with missing or infinite values will automatically be excluded from the computations.
%
% Optional input arguments:           
%   intercept : If 1, a model with constant term will be fitted (default),
%               if 0, no constant term will be included. 
%   intadjust : If 1, the intercept adjustment will be applied in each step
%               of the algorithm. These calculations need substantially more 
%               computation time than intadjust=0, which is the default value.
%           h : The number of observations that have determined the least 
%               trimmed squares estimator. Any value between n/2 and n may be specified.
%       alpha : (1-alpha) measures the fraction of outliers the algorithm should 
%               resist. Any value between 0.5 and 1 may be specified. (default = 0.75)
%      ntrial : Number of initial subsets drawn. Its default value is 500.
%       plots : If equal to one, a menu is shown which allows to draw several plots,
%               such as residual plots and a regression outlier map. (default)
%               If the input argument 'classic' is equal to one, the classical
%               plots are drawn as well.
%               If 'plots' is equal to zero, all plots are suppressed.
%               See also makeplot.m
%     classic : If equal to one, classical least squares regression will be performed,
%               see ols.m (default = 0).
%
% Input arguments for advanced users:
%     Hsets : Instead of random trial h-subsets (default, Hsets = []), Hsets makes it possible to give certain
%             h-subsets as input. Hsets is a matrix that contains the indices of the observations of one
%             h-subset as a row.
%
% I/O:  result=ltsregres(x,y,'plots',0,'intercept',0)
%       [rew,raw] = ltsregres(x,y)
%  The user should only give the input arguments that have to change their default value.
%  The name of the input arguments needs to be followed by their value.
%  The order of the input arguments is of no importance.
%
% The output consists of two structures 'rew' and 'raw' containing the 
% following fields:
%
%    raw.coefficients : Vector of raw LTS coefficient estimates (including the 
%                       intercept, when options.intercept=1).
%          raw.fitted : Vector like y containing the raw fitted values of the response.
%             raw.res : Vector like y containing the raw residuals from the regression.
%           raw.scale : Scale estimate of the raw residuals.
%       raw.objective : Objective function of the LTS regression method, i.e. the sum 
%                       of the h smallest squared raw residuals.
%              raw.wt : Vector like y containing weights that can be used in a weighted
%                       least squares. These weights are 1 for points with reasonably
%                       small raw residuals, and 0 for points with large raw residuals.
%
% 	        rew.slope : Vector of the slope coefficients obtained after reweighting.
%             rew.int : The intercept.
%          rew.fitted : Vector like y containing the fitted values of the response
%                       after reweighting.
%             rew.res : Vector like y containing the residuals from the weighted
%                       least squares regression.
%           rew.scale : Scale estimate of the reweighted residuals.
%        rew.rsquared : Robust version of R squared. This is 1 minus the fraction:
%                       (sum of the quan smallest squared residuals) over (sum of 
%                       the quan smallest (y-loc)^2), where the denominator
%                       is minimized over loc. Note that loc is not subtracted from
%                       y if intercept = 0 in the call to ltsregres.	
%               rew.h : The number of observations that have determined the LTS estimator, 
%                       i.e. the value of h. 
%       rew. Hsubsets : A structure that contains Hopt and Hfreq:
%                        Hopt  : The subset of h points whose covariance matrix has minimal determinant, 
%                                ordered following increasing robust distances.
%                        Hfreq : The subset of h points which are the most frequently selected during the whole
%                                algorithm.
%           rew.alpha : (1-alpha) measures the fraction of outliers the algorithm should 
%                       resist. 
%              rew.rd : The robust distances for the observations of the design matrix, 
%                       based on the MCD estimator (mcdcov.m)  
%            rew.resd : Vector like y containing the standardized residuals
%                       from the weighted least squares regression.
%         rew.weights : Vector like y containing weights that have been used in a weighted
%                       least squares. These weights are 1 for points with reasonably
%                       small raw residuals, and 0 for points with large raw residuals.
%          rew.cutoff : Structure which contains cutoff values for the robust distances computed by mcdcov.m,
%                       and for the standardized residuals. 
%            rew.flag : Vector like y containing flags based on the reweighted regression.
%                       These flags determine which observations can be considered as 
%                       outliers.
%          rew.method : Character string naming the method (Least Trimmed Squares).	
%           rew.class : 'LTS'
%         rew.classic : If the input argument 'classic' equals 1, this structure contains the 
%                       results of a classical least squares regression
%               rew.X : If x is univariate, same as the input x in the call to ltsregres,
%                       without rows containing missing or infinite values. 
%               rew.y : If x is univariate, same as the input y in the call to ltsregres,
%                       without rows containing missing or infinite values.

%
% This function is part of LIBRA: the Matlab Library for Robust Analysis,
% available at: 
%              http://wis.kuleuven.be/stat/robust.html
%
% Version 22/12/2000, 
% Written by Katrien Van Driessen and Randy Brenkers
% Revisions by Sabine Verboven, Sanne Engelen, Nele Smets
% Last update: 06/07/2004

%MATLAB: to avoid singularMatrix in line 538 
warning off 
%The maximum value for p = number of variables
pmax=20;
%The maximum value for n = number of observations
nmax=50000;
%To change the number of subdatasets and their size, the values of maxgroup
%and nmini can be changed
maxgroup=5;
nmini=300;
%The number of iteration steps in stages 1,2 and 3 can be changed
% by adapting the parameters csteps1, csteps2, and csteps3.
csteps1=2;
csteps2=2;
csteps3=100;

pmax1=pmax+1;
pmax2=pmax*pmax;
nvm11=pmax*pmax1;
nvm12=pmax1*pmax1;
km10=10*maxgroup;
nmaxi=nmini*maxgroup;
maxmini=fix(((3*nmini-1)/2)+1);
% dtrial : number of subsamples if not all (p+1)-subsets will be considered. 
dtrial=500;

[n,p]=size(x);
[m,q]=size(y);
if q~=1
    if m==1
        y=y';
    else
        error('y is not one-dimensional.');
    end
end
na.x=~isfinite(x*ones(p,1));
na.y=~isfinite(y);
if size(na.x,1)~=size(na.y,1)
    error('Number of observations in x and y not equal.');
end
% Observations with missing or infinite values are ommitted. 
ok=~(na.x|na.y);
x=x(ok,:);
y=y(ok,:);
dx=size(x);
dy=size(y,1);
n=dx(1);
% Some checks are now performed.
if n == 0
    error('All observations have missing values!');
end
if n > nmax
    error(['The program allows for at most ' int2str(nmax) ' observations.']);
end
%internal variables and default values
seed=0;
alfa=0.75;
hdef=quanf(alfa,n,p+1); %default value of h
hmin=quanf(0.5,n,p+1); %minmal value of h
default=struct('intercept',1,'intadjust',0,'alpha',alfa,'h',hdef,'plots',1,'ntrial',dtrial,'classic',0,'Hsets',[]);
list=fieldnames(default);
options=default;
IN=length(list);
i=1;
counter=1;
%Reading optional inputarguments
if nargin > 3
    %
    % placing inputfields in array of strings
    %
    for j=1:nargin-2
        if rem(j,2)~=0
            chklist{i}=varargin{j};
            i=i+1;
        end
    end 
    dummy=sum(strcmp(chklist,'h')+2*strcmp(chklist,'alpha'));
    switch dummy
        case 0 % defaultvalues should be taken
            alfa=options.alpha;
            h=options.h;
        case 3
            error('Both inputarguments alpha and h are provided. Only one is required.')
    end
    %
    % Checking which default parameters have to be changed
    % and keep them in the structure 'options'.
    %
    while counter<=IN 
        index=strmatch(list(counter,:),chklist,'exact');
        if ~isempty(index) % in case of similarity
            for j=1:nargin-3 % searching the index of the accompanying field
                if rem(j,2)~=0 % fieldnames are placed on odd index
                    if strcmp(chklist{index},varargin{j})
                        I=j;
                    end
                end
            end
            options=setfield(options,chklist{index},varargin{I+1});
            index=[];
        end
        counter=counter+1;
    end
    if dummy==1% checking inputvariable h
        if options.h < hmin                                                      
            error(['The LTS must cover at least ' int2str(hmin) ' observations.'])
        elseif options.h > n
            error('h is greater than the number of non-missings and non-infinites.')
        elseif options.h < p
            error(['h should be larger than the dimension ' int2str(p) '.'])
        elseif options.h==0
            options.h=h;
        end
        options.alpha=options.h/n;
    elseif dummy==2
        if options.alpha < 0.5
            options.alpha=0.5;
            mess=sprintf(['Attention : Alpha should be larger than 0.5. \n',...
                    'It is set to 0.5.']);
            disp(mess)
        end
        if options.alpha > 1
            options.alpha=0.75;
            mess=sprintf(['Attention : Alpha should be smaller than 1.\n',...
                    'It is set to 0.75.']);
            disp(mess)
        end
        if options.alpha == 0
            options.alpha=0.75;
        end
        options.h=quanf(options.alpha,n,p);
    end
end
intercept=options.intercept; %if 1 intercept in the model
h=options.h; %number of regular data points on which estimates are based (=[alpha * n])
plots=options.plots; %relevant plots if equal to 1
alfa=options.alpha; %proportion of regular data points
ntrial=options.ntrial; %number of subsets to be taken in the first step
classic=options.classic; %classic least squares regression?
bestobj=inf; %best objective value until 'now' -> goal is to be as small as possible
intadj=options.intadjust; %intercept adjustment needed if set to 1
Hsets = options.Hsets;
if ~isempty(Hsets)
    Hsets_ind = 1;
else
    Hsets_ind = 0;
end

if classic
    res.classic=libra_ols(x,y,'plots',0);
else 
    res.classic=0;
end

if intercept == 1
    dx=dx+[0 1];
    x=cat(2,x,ones(n,1));
end
p=dx(2);
if n < p
   error('Need more observations than variables.');
end

if p > pmax
   error(['The program allows for at most ' int2str(pmax) ' variables.'])
end

rk=rank(x);
if rk < p
   error('x is singular');
end

if h == n 
    res.method='Least Squares Regression.';
    [Q,R]=qr(x,0);
    z=R\(Q'*y);
    raw.coefficients=z;
    residuals=y-x*z;
    raw.res=residuals;
    fitted=x*raw.coefficients;
    raw.fitted=fitted;
    s0=sqrt(sum(residuals.^2)/(n-p));
    if abs(s0) < 1e-7
        weights=abs(residuals)<=1e-7;
        raw.wt=weights;
        raw.scale=0;
        res.scale=0;
        res.coefficients=raw.coefficients;
        raw.objective=0;
    else
        sor=sort(residuals.^2);
        raw.objective=sum(sor(1:h));
        raw.scale=s0;
        weights=abs(residuals/s0)<=norminv(0.9875);
        raw.wt=weights;
        [Q,R]=qr(x(weights==1,:),0);
        z=R\(Q'*y(weights==1));
        res.coefficients=z;
        fitted=x*res.coefficients;
        residuals=y-x*z;
        res.scale=sqrt(sum(weights.*(residuals.^2))/(sum(weights)-1));
        weights=abs(residuals/res.scale)<=norminv(0.9875);
    end
    if intercept
        s1=sum(residuals.^2);
        center=mean(y);
        sh=sum((y-center).^2);
        res.rsquared=1-s1/sh;
    else
        s1=sum(residuals.^2);
        sh=sum(y.^2);
        res.rsquared=1-s1/sh;
    end
    if res.rsquared > 1
        res.rsquared=1;
    elseif res.rsquared < 0
        res.rsquared=0;
    end
    if abs(s0) < 0
        res.method=strvcat(res.method,'An exact fit was found!');
    end
    stdres=residuals/res.scale;
    cutoff.resd=sqrt(chi2inv(0.975,1));
    raw=struct('coefficients',{raw.coefficients},'fitted',{raw.fitted},'res',{raw.res},'scale',{raw.scale},...
        'objective',{raw.objective},'wt',{raw.wt});
    rew=struct('slope',{res.coefficients(1:p)},'int',{0},'fitted',{fitted},'res',{residuals},...
        'scale',{res.scale},'rsquared',{res.rsquared},'h',{h},'alpha',{alfa},'resd', {stdres},...
        'rd',{NaN},'cutoff',{cutoff},'flag',{NaN},'weights',{raw.wt},...
        'method',{res.method},'class',{'LTS'},'classic',{res.classic},'X',{x},'y',{y});
    if intercept
        rew=setfield(rew,'int',res.coefficients(p));
        rew=setfield(rew,'slope',res.coefficients(1:p-1));
    end
    if plots& classic 
        mcdres=mcdcov(x,'h',h,'plots',0);
        if -log(abs(det(mcdres.cov)))/size(data,2)> 50
            res.rd=NaN;
        else 
            res.rd=mcdres.rd;
        end
        cutoff.rd=mcdres.cutoff.rd;  
        cutoff.md=mcdres.cutoff.md;
        flags=abs(stdres)<=cutoff.resd;
        rew=setfield(rew,'rd', res.rd);
        rew=setfield(rew,'flag',flags);
        rew=setfield(rew,'cutoff',cutoff);
        try
            makeplot(rew,'classic',1)
        catch %output must be given even if plots are interrupted 
            %> delete(gcf) to get rid of the menu 
        end
    elseif plots 
        mcdres=mcdcov(x,'h',h,'plots',0);
        if -log(abs(det(mcdres.cov)))/size(x,2)> 50
            res.rd=NaN;
        else 
            res.rd=mcdres.rd;
        end
        cutoff.rd=mcdres.cutoff.rd;  
        cutoff.md=mcdres.cutoff.md;
        flags=abs(stdres)<=cutoff.resd;
        rew=setfield(rew,'rd', res.rd);
        rew=setfield(rew,'cutoff',cutoff);
        rew=setfield(rew,'flag',flags);
        try
            makeplot(rew)
        catch %output must be given even if plots are interrupted 
            %> delete(gcf) to get rid of the menu 
        end
    end
    return
end

if p < 5
    eps=1e-12;
elseif p <= 8
    eps=1e-14;
else
    eps=1e-16;
end

% standardization of the data
xorig=x;
yorig=y;
data=[x y];
if ~intercept
    datamed=repmat(0,1,p+1);
    datamad=median(abs(data)).*1.4826;
    for i=1:p+1
        if abs(datamad(i)) <= eps
            datamad(i)=sum(abs(data(:,i)));
            datamad(i)=(datamad(i)/n)*1.2533;
            if abs(datamad(i)) <= eps;
                error('The MAD of some variable is zero');
            end
        end   
    end
    x=x./repmat(datamad(1:p),n,1);
    y(:,1)=y(:,1)./datamad(p+1);
else
    datamed=median(data);
    datamed(p)=0;
    datamad(p)=1;
    for i=1:p+1
        if i ~= p
            datamad(i)=median(abs(data(:,i)-datamed(i)))*1.4826;
            if abs(datamad(i)) <= eps
                datamad(i)=sum(abs(data(:,i)-datamed(i)));
                datamad(i)=(datamad(i)/n)*1.2533;
                if abs(datamad(i)) <= eps
                    error('The MAD of some variable is zero');
                end
            end
        end
    end
    x=(x-repmat(datamed(1:p),n,1))./repmat(datamad(1:p),n,1);
    y(:,1)=(y(:,1)-datamed(p+1))./datamad(p+1);
end

res.method='Least Trimmed Squares Regression.';
al=0;
teller = zeros(1,n+1);
if Hsets_ind
    csteps = csteps1;
    inplane = NaN;
    fine = 0;
    part =  0;
    final = 1;
    tottimes = 0;
    nsamp = size(Hsets,1);
    obsingroup = n;
else
    if n >= 2*nmini  
        maxobs=maxgroup*nmini;
        if n >= maxobs
            ngroup=maxgroup;
            group(1:maxgroup)=nmini;
        else
            ngroup=floor(n/nmini);
            minquan=floor(n/ngroup);
            group(1)=minquan;
            for s=2:ngroup
                group(s)=minquan+double(rem(n,ngroup)>=s-1);
            end
        end
        part=1;
        adjh=floor(group(1)*alfa);
        nsamp=floor(ntrial/ngroup);
        minigr=sum(group);
        obsingroup=fillgroup(n,group,ngroup,seed);
        totgroup=ngroup;
    else
        [part,group,ngroup,adjh,minigr,obsingroup]=deal(0,n,1,h,n,n);
        replow=[50,22,17,15,14,zeros(1,45)];
        if n < replow(p)
            al=1;
            perm=[1:p-1,p-1];                 
            nsamp=nchoosek(n,p);
        else
            al=0;
            nsamp=ntrial;
        end
    end
    
    csteps=csteps1;
    [tottimes,fine,final]=deal(0);
    if part
        bobj1=repmat(inf,ngroup,10); 
        bcoeff1=cell(ngroup,10);
        [bcoeff1{:}]=deal(NaN);
    end
end
bcoeff=cell(1,10);
bobj=repmat(inf,1,10);
[bcoeff{:}]=deal(NaN);
seed=0;
coeffs=repmat(NaN,p,1);
while final ~= 2
    if fine | (~part & final)
        if ~Hsets_ind
            nsamp=10;
        end
        if final
            adjh=h;
            ngroup=1;
            if n*p <= 1e+5
                csteps=csteps3;
            elseif n*p <= 1e+6
                csteps=10-(ceil(n*p/1e+5)-2);
            else
                csteps=1;
            end
            if n > 5000
                nsamp=1;
            end
        else
            adjh=floor(minigr*alfa);
            csteps=csteps2;
        end
    end
    for k=1:ngroup
        for i=1:nsamp
            tottimes=tottimes+1;
            prevobj=0;
            if ~Hsets_ind
                if final
                    if ~isinf(bobj(i))
                        z=bcoeff{i};  	
                    else
                        break
                    end
                elseif fine
                    if ~isinf(bobj1(k,i))
                        z=bcoeff1{k,i};
                    else
                        break
                    end
                else
                    z(1,1)=Inf;
                    while abs(z(1,1)) == Inf
                        if ~part
                            if al
                                k=p;
                                perm(k)=perm(k)+1;
                                while ~(k==1|perm(k) <= (n-(p-k)))
                                    k=k-1;
                                    perm(k)=perm(k)+1;
                                    for j=(k+1):p
                                        perm(j)=perm(j-1)+1;
                                    end	
                                end
                                index=perm;
                            else
                                [index,seed]=randomset(n,p,seed);
                            end
                        else
                            [index,seed]=randomset(group(k),p,seed);
                            index=obsingroup{k}(index);
                        end
                        if p > 1
                            z=x(index,:)\y(index,1);
                            %problems arise whenever the subsample contains
                            %equal x-values. To avoid warnings the tests in line
                            %551 and line 591 are adapted to abs(z(1,1)).
                            %However, in the first run the matrix will still
                            %be singular having coefficients [-inf a b ...] or
                            %[inf inf ...] producing the warning. To avoid
                            %this we turned off the warnings in this
                            %function (line 140).
                        elseif x(index,1) ~= 0
                            z(1,1)=y(index,1)/x(index,1);
                        else
                            z(1,1)=x(index,1);
                        end
                        if al
                            break
                        end
                    end
                end
                if abs(z(1,1)) ~= Inf 
                    if ~part | final
                        residu=y-x*z;
                    elseif ~fine
                        residu=y(obsingroup{k},1)-x(obsingroup{k},:)*z;
                    else
                        residu=y(obsingroup{totgroup+1},1)-x(obsingroup{totgroup+1},:)*z;
                    end        
                    more1=0;
                    more2=0;
                    nmore=200;
                    nmore2=nmore/2;
                    if intadj %intercept adjustment
                        [sortres,sortind]=sort(residu);
                        if ~part %n<600 
                            [center,cover,loc]=mcduni(sortres,obsingroup,adjh,obsingroup-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif ~fine %n>600, first step
                            [center,cover,loc]=mcduni(sortres,size(obsingroup{k},2),adjh,size(obsingroup{k},2)-adjh+1,alfa);   
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif ~final & size(obsingroup{totgroup+1},2)-adjh <= nmore %fine =  merged set
                            [center,cover,loc]=mcduni(sortres,size(obsingroup{totgroup+1},2),adjh,size(obsingroup{totgroup+1},2)-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif final & n-adjh <= nmore %final = complete data set
                            [center,cover,loc]=mcduni(sortres,n,adjh,n-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        else   
                            [sortres1,sortind1]=sort(abs(sortres));
                            [sortres2,sortind2]=sort(sortres(sortind1(1:adjh)));
                            further = 1;
                            if final & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n  | sortind1(sortind2(1))-nmore2< 1)
                                [center,cover,loc]=mcduni(sortres,n,adjh,n-adjh+1,alfa);
                                z(p)=z(p)+center;
                                residu=residu-center;
                            elseif ~final & fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2 < 1)
                                [center,cover,loc]=mcduni(sortres,size(obsingroup{totgroup+1},2),adjh,size(obsingroup{totgroup+1},2)-adjh+1,alfa);
                                z(p)=z(p)+center;
                                residu=residu-center;
                            else   
                                while further
                                    sortres2(1:adjh+nmore)=sortres(sortind1(sortind2(1))-nmore2:sortind1(sortind2(1))+adjh-1+nmore-nmore2);
                                    [center,cover,loc]=mcduni(sortres2,adjh+nmore,adjh,nmore+1,alfa);
                                    if loc == 1 & ~more1
                                        if ~more2
                                            nmore=nmore2;
                                            nmore2=nmore2+nmore2;
                                            more1=1;
                                            if sortind1(sortind2(1))-nmore2 < 1
                                                further=0;
                                            end
                                        else 
                                            further=0;
                                        end
                                    else
                                        if loc == nmore+1 & ~more2
                                            if ~more1
                                                nmore=nmore2;
                                                nmore2=-nmore2;
                                                more2=1;
                                                if final & sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n  
                                                    further=0;
                                                elseif fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2<1)
                                                    further=0;
                                                end
                                            else 
                                                further = 0;
                                                
                                            end
                                        else
                                            if loc == 1 & more1
                                                if ~more2
                                                    nmore2=nmore2+100;
                                                    if sortind1(sortind2(1))-nmore2 < 1
                                                        further=0;
                                                    end
                                                else 
                                                    further = 0;
                                                end
                                            else
                                                if loc == nmore+1 & more2
                                                    if ~more1
                                                        nmore2=nmore2+100;
                                                        if final & sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n
                                                            further=0;
                                                        elseif fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2<1)
                                                            further=0;
                                                        end
                                                    else 
                                                        further=0;
                                                    end
                                                else
                                                    further=0;
                                                end
                                            end
                                        end
                                    end
                                end
                                z(p)=z(p)+center;
                                residu=residu-center;
                            end
                        end
                    end
                end
            end
            for j=1:csteps %csteps on the subsets
                tottimes=tottimes+1;
                if ~Hsets_ind
                    if z(1,1)~=inf
                        [sortres,sortind]=sort(abs(residu));
                        if fine & ~final
                            sortind=obsingroup{totgroup+1}(sortind);                    
                        elseif part & ~final
                            sortind=obsingroup{k}(sortind);
                        end
                        obs_in_set=sort(sortind(1:adjh));
                        teller(obs_in_set) = teller(obs_in_set) + 1;
                        teller(end) = teller(end) + 1;
                    end
                else
                    obs_in_set = Hsets(i,:);
                end
                
                if Hsets_ind|(~Hsets_ind & z(1,1)~=inf)
                    [Q,R]=qr(x(obs_in_set,:),0);
                    z=R\(Q'*y(obs_in_set,1));
                    if ~part | final
                        residu=y-x*z;   
                    elseif ~fine
                        residu=y(obsingroup{k},1)-x(obsingroup{k},:)*z;
                    else
                        residu=y(obsingroup{totgroup+1},1)-x(obsingroup{totgroup+1},:)*z;
                    end
                    more1=0;
                    more2=0;
                    nmore=200;
                    nmore2=nmore/2;
                    if intadj %intercept adjustment
                        [sortres,sortind]=sort(residu);
                        if ~part
                            [center,cover,loc]=mcduni(sortres,obsingroup,adjh,obsingroup-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif ~fine
                            [center,cover,loc]=mcduni(sortres,size(obsingroup{k},2),adjh,size(obsingroup{k},2)-adjh+1,alfa);   
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif ~final & size(obsingroup{totgroup+1},2)-adjh <= nmore
                            [center,cover,loc]=mcduni(sortres,size(obsingroup{totgroup+1},2),adjh,size(obsingroup{totgroup+1},2)-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        elseif final & n-adjh <= nmore
                            [center,cover,loc]=mcduni(sortres,n,adjh,n-adjh+1,alfa);
                            z(p)=z(p)+center;
                            residu=residu-center;
                        else   
                            [sortres1,sortind1]=sort(abs(sortres));
                            [sortres2,sortind2]=sort(sortres(sortind1(1:adjh)));
                            further = 1;
                            if final & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n  | sortind1(sortind2(1))-nmore2< 1)
                                [center,cover,loc]=mcduni(sortres,n,adjh,n-adjh+1,alfa);
                                z(p)=z(p)+center;
                                residu=residu-center;
                            elseif ~final & fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2 < 1)
                                [center,cover,loc]=mcduni(sortres,size(obsingroup{totgroup+1},2),adjh,size(obsingroup{totgroup+1},2)-adjh+1,alfa);
                                z(p)=z(p)+center;
                                residu=residu-center;
                            else   
                                while further
                                    sortres2(1:adjh+nmore)=sortres(sortind1(sortind2(1))-nmore2:sortind1(sortind2(1))+adjh-1+nmore-nmore2);
                                    [center,cover,loc]=mcduni(sortres2,adjh+nmore,adjh,nmore+1,alfa);
                                    if loc == 1 & ~more1
                                        if ~more2
                                            nmore=nmore2;
                                            nmore2=nmore2+nmore2;
                                            more1=1;
                                            if sortind1(sortind2(1))-nmore2 < 1
                                                further=0;
                                            end
                                        else 
                                            further=0;
                                        end
                                    else
                                        if loc == nmore+1 & ~more2
                                            if ~more1
                                                nmore=nmore2;
                                                nmore2=-nmore2;
                                                more2=1;
                                                if final & sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n  
                                                    further=0;
                                                elseif fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2<1)
                                                    further=0;
                                                end
                                            else further=0; 
                                            end
                                        else
                                            if loc == 1 & more1
                                                if ~more2
                                                    nmore2=nmore2+100;
                                                    if sortind1(sortind2(1))-nmore2 < 1
                                                        further=0;
                                                    end
                                                else further = 0; 
                                                end
                                            else
                                                if loc == nmore+1 & more2
                                                    if ~more1
                                                        nmore2=nmore2+100;
                                                        if final & sortind1(sortind2(1))+nmore-nmore2+adjh-1 > n
                                                            further=0;
                                                        elseif fine & (sortind1(sortind2(1))+nmore-nmore2+adjh-1 > size(obsingroup{totgroup+1},2) | sortind1(sortind2(1))-nmore2<1)
                                                            further=0;
                                                        end
                                                    else further=0; 
                                                    end
                                                else
                                                    further=0;
                                                end
                                            end
                                        end
                                    end
                                end
                                z(p)=z(p)+center;
                                residu=residu-center;
                            end
                        end
                    end
                    sor=sort(abs(residu));
                    obj=sum(sor(1:adjh).^2); %objective function value after the iteration
                    if j >= 2 & obj == prevobj
                        break;
                    end
                    prevobj=obj;
                end %end final
            end
            if ~final 
                if fine |~part %merged set or n<600
                    if obj < max(bobj)		
                        [bcoeff,bobj]=insertion(bcoeff,bobj,z,obj,1,eps);
                    end
                else
                    if obj < max(bobj1(k,:))	
                        [bcoeff1,bobj1]=insertion(bcoeff1,bobj1,z,obj,k,eps);
                    end
                end
            end
            if final & obj < bestobj
                bestset=obs_in_set;
                bestobj=obj;
                coeffs=z;
            end 
        end    %end for nsamp
    end %end for ngroups
    if part & ~fine
        fine = 1;
    elseif (part & fine & ~final) | (~part & ~final)
        final = 1;
    else
        final = 2;
    end
end %end while,  so final = 2

if p <= 1
    coeffs(1)=coeffs(1)*datamad(p+1)/datamad(1);
else
    coeffs(1:p-1)=(coeffs(1:p-1)*datamad(p+1))'./datamad(1:p-1);
    if ~intercept
        coeffs(p)=coeffs(p)*datamad(p+1)/datamad(p);
    else
        coeffs(p)=coeffs(p)*datamad(p+1);
        coeffs(p)=coeffs(p)-sum(coeffs(1:p-1)'.*datamed(1:p-1));
        coeffs(p)=coeffs(p)+datamed(p+1);
    end      
end
bestobj=bestobj*(datamad(p+1)^2);
x=xorig;
y=yorig;
raw.coefficients=coeffs;
raw.objective=bestobj;
fitted=x*coeffs;
raw.fitted=fitted;
residuals=y-fitted;
raw.residuals=residuals;
sor=sort(residuals.^2);
factor=rawconsfactorlts(h,n);
sh0=sqrt((1/h)*sum(sor(1:h)));
s0=sh0*factor;
cutoff.resd=sqrt(chi2inv(0.975,1));
if abs(s0) < 1e-7
   weights=abs(residuals)<=1e-7;
   raw.wt=weights;
   raw.scale=0;
   res.scale=0;
   res.coefficients=raw.coefficients;
   raw.objective=0;
else
    raw.scale=s0;
    m=2*n/asvarscalekwad(h,n);
    quantile=tinv(0.9875,m);
    weights=abs(residuals/s0)<=quantile;
    raw.wt=weights;
    [Q,R]=qr(x(weights==1,:),0);
    z=R\(Q'*y(weights==1));
    res.coefficients=z;
    fitted=x*res.coefficients;
    residuals=y-fitted;
    res.scale=sqrt(sum(weights.*residuals.^2)/(sum(weights)-1));
    s0=res.scale;
    weights=abs(residuals/res.scale)<=cutoff.resd;
end
res.flag=weights;

res.Hsubsets.Hopt = bestset;
[telobs,indobs] = greatsort(teller(1:(end - 1)));
res.Hsubsets.Hfreq = indobs(1:(h));
if size(res.Hsubsets.Hfreq,2) == 1
    res.Hsubsets.Hfreq = res.Hsubsets.Hfreq';
end

if intercept
    yw=y(raw.wt==1);
    cyw=mcenter(yw);
    sres=sum(residuals(raw.wt==1).^2);
    cwy2=sum(cyw.^2);
    res.rsquared=1-sres/cwy2;
else
    sor=sort(residuals.^2);
    s1=sum(sor(1:h));
    sor=sort(y.^2);
    sh=sum(sor(1:h));
    res.rsquared=1-(s1/sh);
end
if res.rsquared > 1
   res.rsquared=1;
elseif res.rsquared < 0
    res.rsquared=0;
end

if abs(s0) < 1e-7
    res.method=strvcat(res.method,'An exact fit was found!');
    res.Hsubsets.Hopt=1:n;
    res.Hsubsets.Hfreq=1:n;
    disp('Exact fit was encountered')
end   

if ~intercept
    data=x;
else 
    data=x(:,1:p-1);
end
%calculating residual distances : in case of a univariate analysis they are
%equal to the standardized residuals.
stdres=residuals/res.scale;
cutoff.resd=sqrt(chi2inv(0.975,1));

%assigning ouput
raw=struct('coefficients',{raw.coefficients},'fitted',{raw.fitted},'res',{raw.residuals},'scale',{raw.scale},...
    'objective',{raw.objective},'wt',{raw.wt});
rew=struct('slope',{res.coefficients(1:p)},'int',{0},'fitted',{fitted},'res',{residuals},...
    'scale',{res.scale},'rsquared',{res.rsquared},'h',{h},'Hsubsets',{res.Hsubsets},'alpha',{alfa},'rd',{0},'resd', {stdres},...
    'cutoff',{cutoff},'flag',{res.flag},...
    'method',{res.method},'class',{'LTS'},'classic',{res.classic},'X',{data},'y',{y});

if intercept
    rew=setfield(rew,'int',res.coefficients(p));
    rew=setfield(rew,'slope',res.coefficients(1:p-1));
end

if isfield(rew,'X') & ((size(x,2)-intercept)~=1 | size(y,2)~=1)
    rew=rmfield(rew,{'X','y'});
end
warning on
if plots & classic
    mcdres=mcdcov(data,'h',h,'plots',0);
    if -log(abs(det(mcdres.cov)))/size(data,2)> 50
        res.rd=NaN;
    else
        res.rd=mcdres.rd;
    end
    cutoff.rd=mcdres.cutoff.rd; 
    cutoff.md=mcdres.cutoff.md;
    rew=setfield(rew,'cutoff',cutoff);
    rew=setfield(rew,'rd', res.rd);
    try
        makeplot(rew,'classic',1)
    catch %output must be given even if plots are interrupted 
        %> delete(gcf) to get rid of the menu 
    end
elseif plots
    mcdres=mcdcov(data,'h',h,'plots',0);
    if -log(abs(det(mcdres.cov)))/size(data,2)> 50
        res.rd=NaN;
    else
        res.rd=mcdres.rd;
    end
    cutoff.rd=mcdres.cutoff.rd; 
    cutoff.md=mcdres.cutoff.md;
    rew=setfield(rew,'cutoff',cutoff);
    rew=setfield(rew,'rd', res.rd);
    try
        makeplot(rew)
    catch %output must be given even if plots are interrupted 
        %> delete(gcf) to get rid of the menu 
    end
end
if rew.rd==0
    rew=rmfield(rew,'rd');
end
% --------------------------------------------------------------------

function obsingroup = fillgroup(n,group,ngroup,seed)

% Creates the subdatasets.

obsingroup=cell(1,ngroup+1);
jndex=0;
for k = 1:ngroup
   for m = 1:group(k)
      [random,seed]=uniran(seed);
      ran=floor(random*(n-jndex)+1);
      jndex=jndex+1;
      if jndex == 1
         index(1,jndex)=ran;
         index(2,jndex)=k;
      else
         index(1,jndex)=ran+jndex-1;
         index(2,jndex)=k;
         ii=min(find(index(1,1:jndex-1) > ran-1+[1:jndex-1]));
         if length(ii)
            index(1,jndex:-1:ii+1)=index(1,jndex-1:-1:ii);
            index(2,jndex:-1:ii+1)=index(2,jndex-1:-1:ii);
            index(1,ii)=ran+ii-1;
            index(2,ii)=k;
         end
      end
   end
   obsingroup{k}=index(1,index(2,:)==k);
   obsingroup{ngroup+1}=[obsingroup{ngroup+1},obsingroup{k}];
end

% --------------------------------------------------------------------

function [ranset,seed] = randomset(tot,nel,seed)

for j = 1:nel
   [random,seed]=uniran(seed);
   num=floor(random*tot)+1;
   if j > 1
      while any(ranset==num)
         [random,seed]=uniran(seed);
         num=floor(random*tot)+1;
      end
   end
   ranset(j)=num;   
end


% --------------------------------------------------------------------

function [output] = replow(k,pmax)

replow=[500,50,22,17,15,14];
help=zeros(1,pmax-5);
replow=[replow help];
output=replow(k);

% --------------------------------------------------------------------

function [initmean,initcov,iloc]=mcduni(y,ncas,h,len,alfa)

% The exact MCD algorithm for the univariate case. 

y=sort(y);
ay(1)=sum(y(1:h));
factor=rawconsfactormcd(h,ncas);

for samp=2:len
   ay(samp)=ay(samp-1)-y(samp-1)+y(samp+h-1);
end
ay2=ay.^2/h;
sq(1)=sum(y(1:h).^2)-ay2(1);
for samp=2:len
   sq(samp)=sq(samp-1)-y(samp-1)^2+y(samp+h-1)^2-ay2(samp)+ay2(samp-1);
end
sqmin=min(sq);
ii=find(sq==sqmin);
ndup=length(ii);
slutn(1:ndup)=ay(ii);
initmean=slutn(floor((ndup+1)/2))/h;
initcov=factor^2*sqmin/h;
iloc=ii(1);

% -----------------------------------------------------------------------------

function [bestmean,bobj]=insertion(bestmean,bobj,z,obj,row,eps)

insert=1;
equ=find(obj==bobj(row,:));
for j=equ
   if (z==bestmean{row,j}) 
      insert=0;
   end
end
if insert 
   ins=min(find(obj < bobj(row,:))); 
   if ins==10
      bestmean{row,ins}=z;
      bobj(row,ins)=obj;
   else
      [bestmean{row,ins+1:10}]=deal(bestmean{row,ins:9});
      bestmean{row,ins}=z;
      bobj(row,ins+1:10)=bobj(row,ins:9);
      bobj(row,ins)=obj;
   end
end

% --------------------------------------------------------------------------------

function quan=quanf(alfa,n,rk)

quan=floor(2*floor((n+rk+1)/2)-n+2*(n-floor((n+rk+1)/2))*alfa);


%---------------------------------------------------------------

function rawconsfacmcd=rawconsfactormcd(quan,n)

qalpha=chi2inv(quan/n,1);
calphainvers=gamcdf(qalpha/2,1/2+1)/(quan/n);
calpha=1/calphainvers;
rawconsfacmcd=calpha;


%-------------------------------------------------------------

function rawconsfaclts=rawconsfactorlts(quan,n)

rawconsfaclts=(1/sqrt(1-((2*n)/(quan*(1/norminv((quan+n)/(2*n)))))*...
		normpdf(1/(1/(norminv((quan+n)/(2*n)))))));

%--------------------------------------------------------------

function asvar=asvarscalekwad(quan,n)

alfa=quan/n;
alfa=1-alfa;
qalfa=chi2inv(1-alfa,1);
c2=gamcdf(qalfa/2,1/2+1);
c1=1/c2;
c3=3*gamcdf(qalfa/2,1/2+2);
asvar=qalfa*(1-alfa)-c2;
asvar=asvar^2;
asvar=(c3-2*qalfa*c2+(1-alfa)*(qalfa^2))-asvar;
asvar=c1^2*asvar;
%--------------------------------------------------------------