Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0

File: <base>/sources/reko.kemppainen_at_gmail.com/entry8/PCA_imputation.m (17,235 bytes)
 function PCA_imputation(X1,X2,DESCRIPTORS,IHD,names1,names2)
% X_orig=X;
% x1=X(~isnan(X(:,1)) & ~isnan(X(:,2)),1);
% x2=X(~isnan(X(:,1)) & ~isnan(X(:,2)),2);
% X=[x1 x2];

%n2=length(x1);
% DIF_A=X1-X2;
% DIF_A(:,strcmp(names1,'PH'))=abs(DIF_A(:,strcmp(names1,'PH')));

DIF_A=X2;

X=[X1 DIF_A ];

%X=[X1];

%X=[X];
AGE_a=DESCRIPTORS(:,strcmp(names2,'Age'));
Gender_a=DESCRIPTORS(:,strcmp(names2,'Gender'));
ICUTYPE_a=[DESCRIPTORS(:,strcmp(names2,'ICUType'))];

ICUTYPE_t=ICUTYPE_a;

ICUTYPE_a(ICUTYPE_t==1)=2;
ICUTYPE_a(ICUTYPE_t==2)=4;
ICUTYPE_a(ICUTYPE_t==3)=1;
ICUTYPE_a(ICUTYPE_t==4)=3;

X=[AGE_a ICUTYPE_a X];


set_name_b='set-b';
[MEAN_DATA_24_B, MEAN_DATA_48_B, DESCRIPTORS_B]= read_data(set_name_b);



% DIF_B=MEAN_DATA_24_B-MEAN_DATA_48_B;
% DIF_B(:,strcmp(names1,'PH'))=abs(DIF_B(:,strcmp(names1,'PH')));

DIF_B=MEAN_DATA_48_B;



Xprobe=[MEAN_DATA_24_B DIF_B];

%Xprobe=[MEAN_DATA_24_B];

%Xprobe=[DESCRIPTORS_B(:,strcmp(names2,'Age')) Xprobe];
AGE_b=DESCRIPTORS_B(:,strcmp(names2,'Age'));
Gender_b=DESCRIPTORS_B(:,strcmp(names2,'Gender'));
ICUTYPE_b=[DESCRIPTORS_B(:,strcmp(names2,'ICUType'))];


ICUTYPE_t=ICUTYPE_b;

ICUTYPE_b(ICUTYPE_t==1)=2;
ICUTYPE_b(ICUTYPE_t==2)=4;
ICUTYPE_b(ICUTYPE_t==3)=1;
ICUTYPE_b(ICUTYPE_t==4)=3;

Xprobe=[AGE_b ICUTYPE_b Xprobe];

%X_in=X(ICUTYPE_a==1,:);


X_in=X;

Xprobe_in=zeros(size(X_in))+NaN;
%selector_b=selector_b(1:min(end,sum(ICUTYPE_a==1)));

%Xprobe_temp=Xprobe(ICUTYPE_b==1,:);
Xprobe_temp=Xprobe;

Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% scores1=opt_PCA_classification(X_in,IHD(ICUTYPE_a==1),Xprobe_in);
% save('scores1_n35.mat','scores1')

savefile='gender0_impu.mat';
scores1=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,Gender_a,0);


X_in=X;
Xprobe_in=zeros(size(X_in))+NaN;
Xprobe_temp=Xprobe;
Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
savefile='gender1_impu.mat';
scores2=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,Gender_a,1);




% 
% X_in=X;
% Xprobe_in=zeros(size(X_in))+NaN;
% Xprobe_temp=Xprobe;
% Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% 
% savefile='icu3.mat';
% scores3=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,ICUTYPE_a,3);
% 
% X_in=X;
% Xprobe_in=zeros(size(X_in))+NaN;
% Xprobe_temp=Xprobe;
% Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% 
% savefile='icu4.mat';
% scores4=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,ICUTYPE_a,4);
% 
% 
% 
% X_in=X;
% 
% Xprobe_in=zeros(size(X_in))+NaN;
% 
% Xprobe_temp=Xprobe;
% 
% Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% 
% savefile='icu_2_pca.mat';
% scores2=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,ICUTYPE_a,2);
% 
% 
% X_in=X;
% 
% Xprobe_in=zeros(size(X_in))+NaN;
% 
% Xprobe_temp=Xprobe;
% 
% Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% 
% savefile='icu_3_pca.mat';
% scores3=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,ICUTYPE_a,3);
% 
% X_in=X;
% 
% Xprobe_in=zeros(size(X_in))+NaN;
% 
% Xprobe_temp=Xprobe;
% 
% Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% 
% savefile='icu_4_pca.mat';
% scores4=opt_PCA_classification_test(savefile,X_in,IHD,Xprobe_in,ICUTYPE_a,4);
% 

scores1
scores2
% scores3
% scores4


% main_run

% pause()
return

%savefile='icu_all.mat';
%scores1=opt_PCA_classification_test(savefile,X_in,IHD(ICUTYPE_a==1),Xprobe_in);

%N2=45
X_in=X(ICUTYPE_a==2,:);
Xprobe_in=zeros(size(X_in))+NaN;
%selector_b=selector_b(1:min(end,sum(ICUTYPE_a==1)));
Xprobe_temp=Xprobe(ICUTYPE_b==2,:);
Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% scores2=opt_PCA_classification(X_in,IHD(ICUTYPE_a==2),Xprobe_in);
% save('scores2_n35.mat','scores2')


savefile='icu2.mat';
scores2=opt_PCA_classification_test(savefile,X_in,IHD(ICUTYPE_a==2),Xprobe_in);


X_in=X(ICUTYPE_a==3,:);
Xprobe_in=zeros(size(X_in))+NaN;
%selector_b=selector_b(1:min(end,sum(ICUTYPE_a==1)));
Xprobe_temp=Xprobe(ICUTYPE_b==3,:);
Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% scores3=opt_PCA_classification(X_in,IHD(ICUTYPE_a==3),Xprobe_in);
% save('scores3_n35.mat','scores3')


savefile='icu3.mat';
scores3=opt_PCA_classification_test(savefile,X_in,IHD(ICUTYPE_a==3),Xprobe_in);


X_in=X(ICUTYPE_a==4,:);
Xprobe_in=zeros(size(X_in))+NaN;
%selector_b=selector_b(1:min(end,sum(ICUTYPE_a==1)));
Xprobe_temp=Xprobe(ICUTYPE_b==4,:);
Xprobe_in(1:min(size(Xprobe_temp,1),size(X_in,1)),:)=Xprobe_temp(1:min(size(Xprobe_temp,1),size(X_in,1)),:);
% scores4=opt_PCA_classification(X_in,IHD(ICUTYPE_a==4),Xprobe_in);
% save('scores4_n35.mat','scores4')

savefile='icu4.mat';
scores4=opt_PCA_classification_test(savefile,X_in,IHD(ICUTYPE_a==4),Xprobe_in);



scores1
scores2
scores3
scores4

% lam=35:5:60;
% 
% figure()
% plot(lam,scores1)
% figure()
% plot(lam,scores2)
% figure()
% plot(lam,scores3)
% figure()
% plot(lam,scores4)


% [pc1,score1,latent1,tsquare1] = princomp(zscore(X));
%  opts = struct( 'maxiters', 1000,...
%                 'algorithm', 'vb',...
%                   'xprobe', Xprobe,...
%                 'uniquesv', 0,...
%                 'cfstop', [ 100 0 0 ],...
%                 'minangle', 0 );

    function best_lam_score=opt_PCA_classification(DATA,IHD_in,probe)
        probe=probe';
        opts = struct( 'maxiters', 2000,...
            'xprobe', probe,...
            'uniquesv', 0,...
            'autosave',10000000,...
            'cfstop', [ 100 0 0 ],...
            'minangle', 0 );
        n2=size(DATA,1);
        
        best_lam=0;
        max_score1_lam=0;
        max_score2_lam=0;
        best_lam_score=[];
        lam=0.01:0.002:.03;
        lam=[10 20:5:35];
 
    
        X_orig=DATA;
        
        for lam_idx=1:length(lam)
            
            %         Y=(IHD-1)+IHD; % classes y ={-1,1}
            %         X=X_Test;
            %         R=~isnan(X);
            %         PHAT=teach_log_reg(X,Y,R,lam(lam_idx));
            N=35;
            [ A, S, Mu, V, CV, HP, LC ] = pca_full( X_orig', N,opts );
            
            X_rec =( repmat(Mu,1,n2) + A*S)';
            % %[ A, S, Mu, LC ] = pcaimput( X_orig', N ) ;
            % scatter(X(:,1),X(:,2),'or')
            % hold on
            % scatter(Xrec(:,1),Xrec(:,2),'xg')
            
              [pc1,score1,latent1,tsquare1] = princomp(zscore(X_rec));
              X_rec=score1(:,1:end);
             N2=sum(cumsum(latent1)<0.999*sum(latent1));
             N2=min(N2,lam(lam_idx));
             
             X_rec=X_rec(:,1:N2);%
             
            %X=X(:,1:N);
%             size(X_rec)
%             size(IHD_in)

            train=1:round(n2/2);
            valid=round(n2/2):n2;

            B2 = mnrfit(X_rec(train,:),IHD_in(train)+1);
           % size(B2)
          %%  if size(B2,1)== size(X_rec,2)+1 && size(B2,2)==1
            PHAT = mnrval(B2,X_rec(valid,:));
          %  else
               
           %  best_lam_score(lam_idx)=-1;
           %  continue;
             
        %    end
            
            DATA=zeros(length(valid),3);
            
            IHD_valid=IHD_in(valid);
            
            max_score1=0;
            max_score2=0;
            best_th=0;
            
            for class_th=.1:.01:0.5
                
                
                
                % DATA(:,1)=str2double('0000');
                
                
                DATA(:,2)=PHAT(:,2);
                %DATA(isnan(PHAT(:,2)),2)=PHAT_saps(:,2);
                DATA(:,3)=PHAT(:,2)> class_th;
                % DATA(isnan(PHAT(:,2)),3)=PHAT_saps(:,2)> 0.33;
                
                % % %     DATA(:,2)=PHAT(:,2);
                % % %     DATA(isnan(PHAT(:,2)),2)=PHAT_saps(:,2);
                % % %     DATA(~isnan(PHAT(:,2)),3)=PHAT(~isnan(PHAT(:,2)),2)> class_th;
                % % %     DATA(isnan(PHAT(:,2)),3)=PHAT_saps(:,2)> 0.33;
                
                DATA(DATA(:,2)<0.01,2)=0.01;
                DATA(DATA(:,2)>0.99,2)=0.99;
                %DATA(:,3)=PHAT1(:,2)>0.17 & PHAT2(:,2)>class_th ;
                
                %     if max_score1==0
                %           savefile = 'DATA_learn.mat';
                %         save(savefile, 'DATA')
                %
                %     end
                
                
                %  if(~isempty(results))
                
                
                % Calculate sensitivity (Se) and positive predictivity (PPV)
                TP=sum(DATA(IHD_valid==1,3));
                FN=sum(~DATA(IHD_valid==1,3));
                FP=sum(DATA(IHD_valid==0,3));
                Se=TP/(TP+FN);
                PPV=TP/(TP+FP);
                
                show=0; % if show is 1, the decile graph will be displayed by lemeshow()
                %  H=lemeshow([IHD DATA(:,2)],show);
                
                % Use the title of figure to display the results
                %    title(['H= ' num2str(H) ' Se= ' num2str(Se) ' PPV= ' num2str(PPV) '. ' num2str(class_th) ])
                
                % The event 1 score is the smaller of Se and PPV.
                score1 = min(Se, PPV);
                if score1>max_score1
                    max_score1=score1;
                    best_th=class_th;
                    %   max_score2=H;
                    %  display(['Unofficial Event 1 score: ' num2str(score1)]);
                end
                
                % The event 2 score is the Hosmer-Lemeshow statistic (H).
                %display(['Unofficial Event 2 score: ' num2str(H)]);
                
                %         figure(10)
                %         hold on
                %         scatter(class_th,score1,'x')
                % end
                
                
                
                
            end
            best_lam_score(lam_idx)=max_score1;
            if max_score1 > max_score1_lam
                best_lam=0;
                max_score1_lam=max_score1;
                max_score2_lam=max_score2;
                
            end
            
        end
        
    end



%% test function




    function best_lam_score=opt_PCA_classification_test(str,DATA,IHD_in,probe,TYPES,type)
        probe=probe';
        opts = struct( 'maxiters', 5000,...
            'xprobe', probe,...
            'uniquesv', 0,...
            'cfstop', [ 100 0 0 ],...
            'minangle', 0 );
        n2=size(DATA,1);
        
        best_lam=0;
        max_score1_lam=0;
        max_score2_lam=0;
        best_lam_score=[];
        lam=35;
        lam_idx=1;
        X_orig=DATA;
        
        N2_min=30;
        
      % 'best_th','A','S','Mu','V', 'CV', 'HP', 'LC','N','pc1','mu','sigma','B')

        
       % for lam_idx=1:length(lam)
            
            %         Y=(IHD-1)+IHD; % classes y ={-1,1}
            %         X=X_Test;
            %         R=~isnan(X);
            %         PHAT=teach_log_reg(X,Y,R,lam(lam_idx));
            N=lam(1);
            [ A, S, Mu, V, CV, HP, LC ] = pca_full( X_orig', N,opts );
            
            X_in =( repmat(Mu,1,n2) + A*S)';
            
            
         
            
            X_new=X_orig(1,:)';
            S_new=calc_S_new_data(X_new,A,V,Mu,N,CV);
            
            X_new_rec =( repmat(Mu,1,1) + A*S_new)';
% % %             
% % %             plot(X_new_rec,'r');hold on;plot(X_orig(20,:)')
            
            
            % %[ A, S, Mu, LC ] = pcaimput( X_orig', N ) ;
            % scatter(X(:,1),X(:,2),'or')
            % hold on
            % scatter(Xrec(:,1),Xrec(:,2),'xg')
            
            
            
% %             [Z,mu,sigma] = zscore(X_in);
% % % % % %            
% % % % % %             dim = find(size(X_in) ~= 1, 1);
% % % % % %             % direc
% %             sigma0 = sigma;
% %             sigma0(sigma0==0) = 1;
% %             z = bsxfun(@minus,X_new_rec', mu');
% %             z = bsxfun(@rdivide, z, sigma0');
% %             %--
% %             
% %               [pc1,score1,latent1,tsquare1] = princomp(Z);
% %               X_in=score1(:,1:end);
% %               
% %               
% %               
% %               
% %             N2=sum(cumsum(latent1)<0.999*sum(latent1));
% %               N2=min(N2,N2_min);
% %               
% %             x_in=pinv(pc1)*z(:,1);
% %              
% %             
% %           %  plot(pinv(pc1)*z(:,1),'r');hold on; plot(X_in(20,:))
% %         
% %             X_in=X_in(:,1:N2);%   X=X(:,1:N);
            
            % separate for each TYPE
            X_in=X_in(TYPES==type,:);
            IHD_in=IHD_in(TYPES==type);

            
            B = mnrfit(X_in,IHD_in+1);
            PHAT = mnrval(B,X_in);
            
          %    PHAT_ = mnrval(B,x_in);
            
            DATA=zeros(size(X_in,1),3);
            
            max_score1=0;
            max_score2=0;
            best_th=0;
            
            for class_th=.1:.01:0.5
                
                
                
                % DATA(:,1)=str2double('0000');
                
                
                DATA(:,2)=PHAT(:,2);
                %DATA(isnan(PHAT(:,2)),2)=PHAT_saps(:,2);
                DATA(:,3)=PHAT(:,2)> class_th;
                % DATA(isnan(PHAT(:,2)),3)=PHAT_saps(:,2)> 0.33;
                
                % % %     DATA(:,2)=PHAT(:,2);
                % % %     DATA(isnan(PHAT(:,2)),2)=PHAT_saps(:,2);
                % % %     DATA(~isnan(PHAT(:,2)),3)=PHAT(~isnan(PHAT(:,2)),2)> class_th;
                % % %     DATA(isnan(PHAT(:,2)),3)=PHAT_saps(:,2)> 0.33;
                
                DATA(DATA(:,2)<0.01,2)=0.01;
                DATA(DATA(:,2)>0.99,2)=0.99;
                %DATA(:,3)=PHAT1(:,2)>0.17 & PHAT2(:,2)>class_th ;
                
                %     if max_score1==0
                %           savefile = 'DATA_learn.mat';
                %         save(savefile, 'DATA')
                %
                %     end
                
                
                %  if(~isempty(results))
                
                
                % Calculate sensitivity (Se) and positive predictivity (PPV)
                % using icu type
                
%                 TP=sum(DATA(IHD_in==1 & TYPES==type,3));
%                 FN=sum(~DATA(IHD_in==1 & TYPES==type,3));
%                 FP=sum(DATA(IHD_in==0 & TYPES==type,3));
%                 Se=TP/(TP+FN);
%                 PPV=TP/(TP+FP);
                
% %                 % Calculate sensitivity (Se) and positive predictivity (PPV)
                TP=sum(DATA(IHD_in==1,3));
                FN=sum(~DATA(IHD_in==1,3));
                FP=sum(DATA(IHD_in==0,3));
                Se=TP/(TP+FN);
                PPV=TP/(TP+FP);
                
                show=0; % if show is 1, the decile graph will be displayed by lemeshow()
                %  H=lemeshow([IHD DATA(:,2)],show);
                
                % Use the title of figure to display the results
                %    title(['H= ' num2str(H) ' Se= ' num2str(Se) ' PPV= ' num2str(PPV) '. ' num2str(class_th) ])
                
                % The event 1 score is the smaller of Se and PPV.
                score1 = min(Se, PPV);
                if score1>max_score1
                    max_score1=score1;
                    best_th=class_th;
                    %   max_score2=H;
                    %  display(['Unofficial Event 1 score: ' num2str(score1)]);
                end
                
                % The event 2 score is the Hosmer-Lemeshow statistic (H).
                %display(['Unofficial Event 2 score: ' num2str(H)]);
                
                %         figure(10)
                %         hold on
                %         scatter(class_th,score1,'x')
                % end
                
                
                
                
            end
            best_lam_score(lam_idx)=max_score1;
            if max_score1 > max_score1_lam
                best_lam=0;
                max_score1_lam=max_score1;
                max_score2_lam=max_score2;
                
            end
            

        %end
                    PHAT_all=PHAT;
        save(str,'N2','PHAT_all','best_th','A','S','Mu','V', 'CV', 'HP', 'LC','N','pc1','mu','sigma','B')

    end
end