Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/reko.kemppainen_at_gmail.com/entry9/lin_reg_classifier.m (11,339 bytes)
function lin_reg_classifier(IHD,time_series_names,MD_DATA_D,MD_DATA_DD,MD_DATA,DESCRIPTORS,des_names,R,MD_VARI,MEAN_DATA_24,MEAN_DATA_48)

Y=(IHD-1)+IHD; % classes y ={-1,1}
% X=[MEAN_DATA_24 MEAN_DATA_48];

%X=[MEAN_DATA_24(params_to_use)];

% normeeraus ja keskiarvon poisto


X=[MEAN_DATA_24 MEAN_DATA_48 ];

%X=[X1];

%X=[X];
AGE_a=DESCRIPTORS(:,strcmp(des_names,'Age'));
Gender_a=DESCRIPTORS(:,strcmp(des_names,'Gender'));
ICUTYPE_a=[DESCRIPTORS(:,strcmp(des_names,'ICUType'))];

ICUTYPE_t=ICUTYPE_a;

ICUTYPE_a(ICUTYPE_t==1)=2;
ICUTYPE_a(ICUTYPE_t==2)=4;
ICUTYPE_a(ICUTYPE_t==3)=1;
ICUTYPE_a(ICUTYPE_t==4)=3;

X=[AGE_a ICUTYPE_a X];





%if des{:,strcmp(des_names,'Gender')}==0

%X_new_rec=zeros(size(X));

 % savefile='gender0.mat'; 
  savefile='gender0_60.mat';
  %  savefile='gender0_60_ppca';

  load(savefile,'N2','PHAT_all','best_th','A','S','Mu','V', 'CV', 'HP', 'LC','N','pc1','mu','sigma','B');
   

     % S_new=calc_S_new_data(X',A,V,Mu,N,CV);
    X_new_rec =( repmat(Mu,1,size(S,2)) + (A*S))';
  %  mu1=Mu;
  
  
 % sets outliers to group mean 
%    X1=X_new_rec(:,3:39);
%  X2=X_new_rec(:,40:end);
%   mu24=mu(3:39);  
%   mu48=mu(40:end);
%  for param_idx=1:size(X1,2)
%              limits=get_param_limits_by_name(time_series_names{param_idx});
%              data=X1(:,param_idx);
%              data(data<limits(1) | data>limits(2) | isnan(data) )=mu24(param_idx); 
%              X1(:,param_idx)=data;
%              
%              data=X2(:,param_idx);
%              data(data<limits(1) | data>limits(2) | isnan(data))=mu48(param_idx); 
%              X2(:,param_idx)=data;
% 
%  end
 
%  X_new_rec=[X_new_rec(:,1:2) X1 X2];
 
 
      sigma0 = sigma;
    sigma0(sigma0==0) = 1;
    z = bsxfun(@minus,X_new_rec', mu');
    z = bsxfun(@rdivide, z, sigma0');
    z(isnan(z))=0;
           %  x_in=(pinv(pc1)*z)';
  % X(Gender_a==0,:)=x_in(Gender_a==0,:);
  z=z';
    X(Gender_a==0,:)=z(Gender_a==0,:);
   
%     savefile='gender1.mat'; 
  savefile='gender1_60.mat';
 %   savefile='gender1_60_ppca';

  load(savefile,'N2','PHAT_all','best_th','A','S','Mu','V', 'CV', 'HP', 'LC','N','pc1','mu','sigma','B');
      %   mu2=Mu;
%   plot(mu1)
%   hold on
%   plot(mu2)
 % S_new=calc_S_new_data(X(Gender_a==1,:),A,V,Mu,N,CV);
    X_new_rec =( repmat(Mu,1,size(S,2)) + A*S)';
    
    
     % sets outliers to group mean 
%    X1=X_new_rec(:,3:39);
%  X2=X_new_rec(:,40:end);
%     mu24=mu(3:39);  
%   mu48=mu(40:end);
%  for param_idx=1:size(X1,2)
%              limits=get_param_limits_by_name(time_series_names{param_idx});
%              data=X1(:,param_idx);
%              data(data<limits(1) | data>limits(2) | isnan(data) )=mu24(param_idx); 
%              X1(:,param_idx)=data;
%              
%              data=X2(:,param_idx);
%              data(data<limits(1) | data>limits(2) | isnan(data))=mu48(param_idx); 
%              X2(:,param_idx)=data;
% 
%  end
%  
%  X_new_rec=[X_new_rec(:,1:2) X1 X2];
    
    
      sigma0 = sigma;
    sigma0(sigma0==0) = 1;
    z = bsxfun(@minus,X_new_rec', mu');
    z = bsxfun(@rdivide, z, sigma0');
            % x_in=(pinv(pc1)*z)';
 %  X(Gender_a==1,:)=x_in(Gender_a==1,:);
 z=z';
  z(isnan(z))=0;
 X(Gender_a==1,:)=z(Gender_a==1,:);
  X(Gender_a==-1,:)=z(Gender_a==-1,:);

 

 
%X=[ MD_DATA_D MD_DATA_DD MD_DATA double(R) zscore(MD_VARI) X];
X=[Gender_a double(R) MD_VARI X];


%X=[ MD_DATA MD_VARI X];

R=~isnan(X);
s=sum(R,2)==size(X,2); % selects only values that do not contain NaNs
dis=sum(R,2)~=size(X,2);

X=X(s,:);
IHD=IHD(s);
s_selector=s;
 %B=log_reg(X,Y,R);
 %B=teach_log_reg(X,Y,R);
 
 
 
 alpha = 0:0.05:0.7;
 lambda = 0:0.05:0.7;
 scores=zeros(length(alpha)*length(lambda),7);
  scores2=zeros(length(alpha)*length(lambda),7);
 
 idx=1;
 
n=size(X,1);
    s=zeros(n,1);
    s(1:round(n/2))=1;
    s=boolean(s);
 
    dataTrain=X(s,:);
 clabelsTrain=IHD(s);
 labelsTrain=~IHD(s)+1;
 
 
 dataTest=X;
 labelsTest=~IHD+1;
 clabelsTest=IHD;
 
 dataValid=X(~s,:);
 labelsValid=~IHD(~s)+1;
    
 %% jukkiksen starts
% for alpha=0:0.1:0.6
% for lambda=0:0.1:0.6
for alpha=0
for lambda=0.5:0.01:0.6

D = size(dataTrain,2); % dimension

maxIter = 5000;
w_init = 0.1*ones(D+1,1); % initialize parameters

% optimoi malli, siis data annetaan normaalisti piirrevektorina:
[w_opt,fx,it] = minimize(w_init, 'objFuncLR', maxIter, dataTrain, clabelsTrain, alpha, lambda);

% laske ennustustarkkuus:
[ResTrain,labelsEstimTrain,probsTrain,allProbsTrain] = predict_LRoma(w_opt,dataTrain,labelsTrain);
[ResValid,labelsEstimValid,probsValid,allProbsValid] = predict_LRoma(w_opt,dataValid,labelsValid);
[ResTest,labelsEstimTest,probsTest,allProbsTest] = predict_LRoma(w_opt,dataTest,labelsTest);

%% jukkiksen ends

%[best_th,max_score1,BEST_DATA]=opt_th(IHD(s),allProbsTrain);
[best_th1,max_score11,th1,max_score2_11,BEST_DATA]=opt_th(IHD(~s),allProbsValid);
%[best_th,max_score12,th2,max_score2_12,BEST_DATA]=opt_th(~IHD(~s),allProbsValid);
[best_th2,max_score21,th2,max_score2_21,BEST_DATA]=opt_th(IHD,allProbsTest);
%[best_th,max_score22,th2,max_score2_22,BEST_DATA]=opt_th(~IHD(s),allProbsTrain);

% [best_th1,max_score11,th1,max_score2_11,BEST_DATA]=opt_th_icu(IHD(~s),allProbsValid,ICUTYPE_a(~s));
% [best_th2,max_score21,th2,max_score2_21,BEST_DATA]=opt_th_icu(IHD,allProbsTest,ICUTYPE_a);

 scores(idx,:)=[alpha lambda best_th1 max_score11 th1 max_score2_11 ResValid];
 scores2(idx,:)=[alpha lambda best_th2 max_score21 th2 max_score2_21 0];
 idx=idx+1;

end
end

D = size(dataTest,2); % dimension
alpha=0;
lambda=0.54;
maxIter = 10000;
w_init = 0.1*ones(D+1,1); % initialize parameters

% optimoi malli, siis data annetaan normaalisti piirrevektorina:
[w1,fx,it] = minimize(w_init, 'objFuncLR', maxIter, dataTest, clabelsTest, alpha, lambda);

save('w1.mat','w1')


alpha=2;
lambda=4;
maxIter = 10000;
w_init = 0.1*ones(D+1,1); % initialize parameters

% optimoi malli, siis data annetaan normaalisti piirrevektorina:
[w2,fx,it] = minimize(w_init, 'objFuncLR', maxIter, dataTest, clabelsTest, alpha, lambda);

save('w2.mat','w2')



scores=scores(1:idx-1,:);
scores2=scores2(1:idx-1,:);

[m,i]=max(scores(:,4));
save('score1_1_test.mat','scores')

[m,i]=min(scores(:,6));
% save('score1_2_ppca_tst.mat','scores2')

function [best_th,max_score1,best_th2,max_score2,BEST_DATA]=opt_th_icu(IHD_,P,icu)
        
    num_params=size(time_series_names,1);
    DATA=zeros(size(IHD_,1),3);
    
    %B = mnrfit(X_,IHD_+1);
    
    PHAT = P';
       
    
    
    max_score1=0;
    max_score2=1000000;
    best_th=0;
    best_th2=0;
    
    
      for class_th1=.1:.1:0.5
            for class_th2=.1:.1:0.5
                  for class_th3=.1:.1:0.5
                        for class_th4=.1:.1:0.5
        
        
        
                            DATA(:,1)=str2double('0000');

                            DATA(:,2)=PHAT(:,1);

                            DATA(icu==1,3)=PHAT(icu==1,1)> class_th1;
                            DATA(icu==2,3)=PHAT(icu==2,1)> class_th2;
                            DATA(icu==3,3)=PHAT(icu==3,1)> class_th3;
                            DATA(icu==4,3)=PHAT(icu==4,1)> class_th4;

                            DATA(DATA(:,2)<0.01,2)=0.01;
                            DATA(DATA(:,2)>0.99,2)=0.99;


                         %   if(~isempty(results))

                                % Calculate sensitivity (Se) and positive predictivity (PPV)
                                TP=sum(DATA(IHD_==1,3));
                                FN=sum(~DATA(IHD_==1,3));
                                FP=sum(DATA(IHD_==0,3));
                                Se=TP/(TP+FN);
                                PPV=TP/(TP+FP);

                                show=0; % if show is 1, the decile graph will be displayed by lemeshow()
                                H=lemeshow([IHD_ DATA(:,2)],show);

                                % Use the title of figure to display the results
                               % title(['H= ' num2str(H) ' Se= ' num2str(Se) ' PPV= ' num2str(PPV) '. ' num2str(class_th) ])

                                % The event 1 score is the smaller of Se and PPV.
                                score1 = min(Se, PPV);
                                if score1>max_score1
                                    max_score1=score1;
                                   % best_th=[class_th1 class_th2 class_th3 class_th4] ;
                                    best_th=[class_th1 ] ;
                                   % max_score2=H;
                                    BEST_DATA=DATA;
                                    %  display(['Unofficial Event 1 score: ' num2str(score1)]);
                                end

                                if H<max_score2

                                    max_score2=H;
                                    best_th2=class_th1 ;
                                   % BEST_DATA=DATA;
                                    %  display(['Unofficial Event 1 score: ' num2str(score1)]);
                                end

                           % end
        
                        end
                  end
            end
      end
    
end



function [best_th,max_score1,best_th2,max_score2,BEST_DATA]=opt_th(IHD_,P)
        
    num_params=size(time_series_names,1);
    DATA=zeros(size(IHD_,1),3);
    
    %B = mnrfit(X_,IHD_+1);
    
    PHAT = P';
       
    
    
    max_score1=0;
    max_score2=100000;
    best_th=0;
    best_th2=0;
    
    for class_th=.1:.01:0.5
        
        
        
        DATA(:,1)=str2double('0000');
        
        DATA(:,2)=PHAT(:,1);
        
        DATA(:,3)=PHAT(:,1)> class_th;
        
        DATA(DATA(:,2)<0.01,2)=0.01;
        DATA(DATA(:,2)>0.99,2)=0.99;
        
        
     %   if(~isempty(results))
            
            % Calculate sensitivity (Se) and positive predictivity (PPV)
            TP=sum(DATA(IHD_==1,3));
            FN=sum(~DATA(IHD_==1,3));
            FP=sum(DATA(IHD_==0,3));
            Se=TP/(TP+FN);
            PPV=TP/(TP+FP);
            
            show=0; % if show is 1, the decile graph will be displayed by lemeshow()
            H=lemeshow([IHD_ DATA(:,2)],show);
            
            % Use the title of figure to display the results
           % title(['H= ' num2str(H) ' Se= ' num2str(Se) ' PPV= ' num2str(PPV) '. ' num2str(class_th) ])
            
            % The event 1 score is the smaller of Se and PPV.
            score1 = min(Se, PPV);
            if score1>max_score1
                max_score1=score1;
                best_th=class_th;
               % max_score2=H;
                BEST_DATA=DATA;
                %  display(['Unofficial Event 1 score: ' num2str(score1)]);
            end
            
            if H<max_score2
      
                max_score2=H;
                best_th2=class_th;
               % BEST_DATA=DATA;
                %  display(['Unofficial Event 1 score: ' num2str(score1)]);
            end
            
       % end
        
    end
    
end


end