Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(10,530 bytes)
function [cleanData, columnHeaders] = TabulatePatientRecordData_binsChangeable2(allPatientStruct, spreads)
%arranges all the patient data into one array named 'cleanData'. the colum headers are saved
%to 'columnHeaders'
%arranges the variables by binned times
normRangFile = importdata('ICU Ranges.csv', ',');
normalRanges = normRangFile.data;
normalRangesNames = normRangFile.textdata;
%fields to be calc'ed based on the numbers. These are basic parameters that
%are usued in all ICU models
calcFields = {'BMI', 'BUN to Creatinine', 'PaO2FiO2', 'BPSys', 'BPDias'};
binableFields = {'Albumin';'ALP';'ALT';'AST';'Bilirubin';...
'Cholesterol';'GCS';...
'Glucose';'HCO3';'HCT';'HR';'K';'Lactate';'Mg';...
'Na';'PaCO2';'pH';'Platelets';'RespRate';...
'SaO2';'Temp';'Urine';'WBC'};
%define the number of hours in each bin. Eg, [8,8,8,8,8,8] would mean
%that for the first day, it is binned by 8 hrs, and for the second day, it
%is grouped by 6 hrs. The sum of the vector should be 48
binSpreads = spreads;
% {...
% 'BUN to Creatinine', ones(1,48);...
% 'PaO2FiO2', ones(1,48);...
% 'BPSys', ones(1,48);...
% 'BPDias', ones(1,48);...
% 'Albumin', ones(1,48);...
% 'ALP', ones(1,48);...
% 'ALT', ones(1,48);...
% 'AST', ones(1,48);...
% 'Bilirubin', ones(1,48);...
% 'Cholesterol', ones(1,48);...
% 'GCS', ones(1,48);...
% 'Glucose', ones(1,48);...
% 'HCO3', ones(1,48);...
% 'HCT', ones(1,48);...
% 'HR', ones(1,48);...
% 'K', ones(1,48);...
% 'Lactate', ones(1,48);...
% 'Mg', ones(1,48);...
% 'Na', ones(1,48);...
% 'PaCO2', ones(1,48);...
% 'pH', ones(1,48);...
% 'Platelets', ones(1,48);...
% 'RespRate', ones(1,48);...
% 'SaO2', ones(1,48);...
% 'Temp', ones(1,48);...
% 'Urine', ones(1,48);...
% 'WBC', ones(1,48)};
%field in the raw data
allGivenFields = fieldnames(allPatientStruct);
cleanData = [];
columnHeaders = {};
for i = 1:length(allPatientStruct)
disp(i);
Xtemp = [];
%---------
%field with no bins
%---------
%gender
gender = allPatientStruct(i).Gender;
gender = gender(2);
if gender < 0
gender = 0;
end
Xtemp = [Xtemp, gender];
if i== 1 columnHeaders = [columnHeaders, 'Gender']; end
%age
Xtemp = [Xtemp, allPatientStruct(i).Age(2)];
if i==1 columnHeaders = [columnHeaders, 'Age']; end
%BMI
if ~isempty(allPatientStruct(i).Height) && ~isempty(allPatientStruct(i).Weight)
bmi = mean(allPatientStruct(i).Weight(2))./mean((allPatientStruct(i).Height(:,2)/100).^2); %definition of BMI
else
bmi = 21.75;
end
%make sure it is reasonable
if bmi < 10 | bmi > 70
bmi = 21.75;
end
Xtemp = [Xtemp, bmi];
if i==1 columnHeaders = [columnHeaders, 'BMI']; end
%---------
%fields with bins
%---------
% do for most of the fields, bin them
for j = 1:length(binableFields)
series = allPatientStruct(i).(binableFields{j});
if isempty(series)
times = NaN;
values = NaN;
else
times = series(:,1);
values = series(:,2);
end
bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
for k = 1:length(bins)
if k==1
low = 0;
else
low = sum(bins(1:k-1));
end
isInBin = (times >= low) & (times < sum(bins(1:k)));
%get the mean value in the binalpha and beta
meanv = mean(values(isInBin));
%append
Xtemp = [Xtemp, meanv];
if i==1 columnHeaders = [columnHeaders, [binableFields{j}, '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
end
end
%deal with each of the calc field individualy
%blood pressure
%dias
if ~isempty(allPatientStruct(i).NIDiasABP)
temp = allPatientStruct(i).NIDiasABP;
elseif ~isempty(allPatientStruct(i).DiasABP)
temp = allPatientStruct(i).DiasABP;
else
temp = [];
end
if isempty(temp)
times = NaN;
values = NaN;
else
times = temp(:,1);
values = temp(:,2);
end
bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2}
for k = 1:length(bins)
if k==1
low = 0;
else
low = sum(bins(1:k-1));
end
isInBin = (times >= low) & (times < sum(bins(1:k)));
%get the mean value in the bin
meanv = mean(values(isInBin));
%append
Xtemp = [Xtemp, meanv];
if i==1 columnHeaders = [columnHeaders, ['BPDias', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
end
%sys bp
if ~isempty(allPatientStruct(i).NISysABP)
temp = allPatientStruct(i).NISysABP;
elseif ~isempty(allPatientStruct(i).SysABP)
temp = allPatientStruct(i).SysABP;
else
temp = [];
end
if isempty(temp)
times = NaN;
values = NaN;
else
times = temp(:,1);
values = temp(:,2);
end
bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
for k = 1:length(bins)
if k==1
low = 0;
else
low = sum(bins(1:k-1));
end
isInBin = (times >= low) & (times < sum(bins(1:k)));
%get the alpha and beta
meanv = mean(values(isInBin));
%append
Xtemp = [Xtemp, meanv];
if i==1 columnHeaders = [columnHeaders, ['BPSys', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
end
%BUN to creatinine
temp1 = allPatientStruct(i).BUN;
temp2 = allPatientStruct(i).Creatinine;
if isempty(temp1) | isempty(temp2)
times = NaN;
y1 = NaN; y2 = NaN;
else
times = sort([temp1(:,1); temp2(:,2)]);
try
y1 = interp1(temp1(:,1), temp1(:,2), times, 'nearest','extrap');
catch
y1 = mean(temp1(:,2))*ones(length(times));
end
try
y2 = interp1(temp2(:,1), temp2(:,2), times, 'nearest','extrap');
catch
y2 = mean(temp2(:,2))*ones(length(times));
end
end
bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
for k = 1:length(bins)
if k==1
low = 0;
else
low = sum(bins(1:k-1));
end
isInBin = (times >= low) & (times < sum(bins(1:k)));
meanv = mean(y1(isInBin)./y2(isInBin));
Xtemp = [Xtemp, meanv];
if i==1 columnHeaders = [columnHeaders, ['BUN to Creatinine', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
end
%pao2/fio2
temp1 = allPatientStruct(i).PaO2;
temp2 = allPatientStruct(i).FiO2;
if isempty(temp1) | isempty(temp2)
times = NaN;
y1 = NaN; y2 = NaN;
else
times = sort([temp1(:,1); temp2(:,2)]);
try
y1 = interp1(temp1(:,1), temp1(:,2), times, 'nearest','extrap');
catch
y1 = mean(temp1(:,2))*ones(length(times));
end
try
y2 = interp1(temp2(:,1), temp2(:,2), times, 'nearest','extrap');
catch
y2 = mean(temp2(:,2))*ones(length(times));
end
end
bins = binSpreads{find(strcmp(binSpreads, binableFields{j})), 2};
for k = 1:length(bins)
if k==1
low = 0;
else
low = sum(bins(1:k-1));
end
isInBin = (times >= low) & (times < sum(bins(1:k)));
meanv = mean(y1(isInBin)./y2(isInBin));
Xtemp = [Xtemp, meanv];
if i==1 columnHeaders = [columnHeaders, ['PaO2FiO2', '_Bin ', num2str(low), ' to ', num2str(sum(bins(1:k)))]]; end
end
%add to the temp to the total array
disp(size(Xtemp))
cleanData = [cleanData;Xtemp];
end
cleanData = removeNaNs(cleanData, columnHeaders);
end
%helper function-----------------------------
function out = cellContains(cellArray, searchValue)
out = -1;
for i = 1:length(cellArray)
if strcmp(cellArray{i}, searchValue)
out = i;
return
end
end
end
%--------------------
%HELPER FUNCTION
function X = removeNaNs(Xold, colHeaders)
normRangFile = importdata('ICU Ranges.csv', ',');
normalRanges = normRangFile.data;
normalRangesNames = normRangFile.textdata;
X = Xold;
%find the binned variables
isBinned = zeros(length(colHeaders), 1);
for i = 1:length(colHeaders)
isBinned(i) = ~isempty(strfind(colHeaders{i}, '_'));
end
assocVariable = {};
for i = 1:length(colHeaders)
locOfUnderS = strfind(colHeaders{i}, '_');
if ~isempty(locOfUnderS)
str = colHeaders{i};
assocVariable = [assocVariable, str(1:locOfUnderS-1)];
else
assocVariable = [assocVariable, 'NONE'];
end
end
variables = unique(assocVariable);
variables(strcmp(variables, 'NONE')) = [];
for i = 1:size(X, 1)
disp(i)
gender = X(i,1);
for j = 1:length(variables)
binVals = X(i, strcmp(assocVariable, variables(j)));
if sum(isnan(binVals)) == length(binVals)
loc = cellContains(normalRangesNames, variables(j));
highRange = normalRanges(loc, (1-gender)*2+2);
lowRange = normalRanges(loc, (1-gender)*2+1);
if strcmp(variables(j), 'Urine')
highRange = highRange*length(binVals);
lowRange = lowRange*length(binVals);
end
X(i,boolean(strcmp(assocVariable, variables(j))))=...
mean([highRange, lowRange]);
elseif sum(isnan(binVals)) == (length(binVals)-1)
X(i,strcmp(assocVariable, variables(j)))=...
nanmean(binVals);
else
times = 1:length(binVals);
interped = interp1(times(~isnan(binVals)), binVals(~isnan(binVals)), times, 'linear', nan);
exterped = interp1(times(~isnan(interped)), interped(~isnan(interped)), times, 'nearest', 'extrap');
disp(variables(j))
disp(exterped')
X(i,strcmp(assocVariable, variables(j)))=exterped;
end
end
end
end