Predicting Mortality of ICU Patients: The PhysioNet/Computing in Cardiology Challenge 2012 1.0.0
(14,826 bytes)
dataset <- read.table("stdin", header=TRUE, sep=",")
TIME=sapply(strsplit(as.character(dataset[,1]),":"),function(x) {
x <- as.numeric(x)
x[1]
}
)
data=cbind(TIME,dataset[,2:3])
d0=t(subset(data,data[,2]=="RecordID")[,-2])
d01=t(subset(data,data[,2]=="ALP")[,-2])
d02=t(subset(data,data[,2]=="ALT")[,-2])
d03=t(subset(data,data[,2]=="AST")[,-2])
d04=t(subset(data,data[,2]=="Age")[,-2])
d1=t(subset(data,data[,2]=="Albumin")[,-2])
d2=t(subset(data,data[,2]=="BUN")[,-2])
d3=t(subset(data,data[,2]=="Bilirubin")[,-2])
d4=t(subset(data,data[,2]=="Cholesterol")[,-2])
d5=t(subset(data,data[,2]=="Creatinine")[,-2])
d6=t(subset(data,data[,2]=="DiasABP")[,-2])
d7=t(subset(data,data[,2]=="FiO2")[,-2])
d8=t(subset(data,data[,2]=="GCS")[,-2])
d9=t(subset(data,data[,2]=="Gender")[,-2])
d10=t(subset(data,data[,2]=="Glucose")[,-2])
d11=t(subset(data,data[,2]=="HCO3")[,-2])
d12=t(subset(data,data[,2]=="HCT")[,-2])
d13=t(subset(data,data[,2]=="HR")[,-2])
d14=t(subset(data,data[,2]=="Height")[,-2])
d15=t(subset(data,data[,2]=="ICUType")[,-2])
d16=t(subset(data,data[,2]=="K")[,-2])
d17=t(subset(data,data[,2]=="Lactate")[,-2])
d18=t(subset(data,data[,2]=="MAP")[,-2])
d19=t(subset(data,data[,2]=="MechVent")[,-2])
d20=t(subset(data,data[,2]=="Mg")[,-2])
d21=t(subset(data,data[,2]=="NIDiasABP")[,-2])
d22=t(subset(data,data[,2]=="NIMAP")[,-2])
d23=t(subset(data,data[,2]=="NISysABP")[,-2])
d24=t(subset(data,data[,2]=="Na")[,-2])
d25=t(subset(data,data[,2]=="PaCO2")[,-2])
d26=t(subset(data,data[,2]=="PaO2")[,-2])
d27=t(subset(data,data[,2]=="Platelets")[,-2])
d28=t(subset(data,data[,2]=="RespRate")[,-2])
d29=t(subset(data,data[,2]=="SaO2")[,-2])
d30=t(subset(data,data[,2]=="SysABP")[,-2])
d31=t(subset(data,data[,2]=="Temp")[,-2])
d32=t(subset(data,data[,2]=="TroponinI")[,-2])
d33=t(subset(data,data[,2]=="Urine")[,-2])
d34=t(subset(data,data[,2]=="WBC")[,-2])
d35=t(subset(data,data[,2]=="Weight")[,-2])
d36=t(subset(data,data[,2]=="pH")[,-2])
d37=t(subset(data,data[,2]=="TroponinT")[,-2])
b=list(d0,d01,d02,d03,d04,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15,d16,d17,d18,d19,d20,
d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31,d32,d33,d34,d35,d36,d37)
t24=t(as.matrix(sapply(b,function(d) mean(d[2,][d[1,]<=24]))))
colnames(t24)=c("X.RecordID.", "X.ALP.", "X.ALT.", "X.AST.", "X.Age.", "X.Albumin.", "X.BUN.",
"X.Bilirubin.", "X.Cholesterol.", "X.Creatinine.", "X.DiasABP.", "X.FiO2.", "X.GCS.", "X.Gender.",
"X.Glucose.", "X.HCO3.", "X.HCT.", "X.HR.", "X.Height.", "X.ICUType.", "X.K.", "X.Lactate.",
"X.MAP.", "X.MechVent.", "X.Mg.", "X.NIDiasABP.", "X.NIMAP.", "X.NISysABP.", "X.Na.", "X.PaCO2.",
"X.PaO2.", "X.Platelets.", "X.RespRate.", "X.SaO2.", "X.SysABP.", "X.Temp.", "X.TroponinI.",
"X.Urine.", "X.WBC.", "X.Weight.", "X.pH.", "X.TroponinT.")
t48=t(as.matrix(sapply(b,function(d) mean(d[2,][d[1,]<=48 && d[1,]>24]))))
colnames(t48)=c("X.RecordID.", "X.ALP.", "X.ALT.", "X.AST.", "X.Age.", "X.Albumin.", "X.BUN.", "X.Bilirubin.",
"X.Cholesterol.", "X.Creatinine.", "X.DiasABP.", "X.FiO2.", "X.GCS.", "X.Gender.", "X.Glucose.", "X.HCO3.",
"X.HCT.", "X.HR.", "X.Height.", "X.ICUType.", "X.K.", "X.Lactate.", "X.MAP.", "X.MechVent.", "X.Mg.",
"X.NIDiasABP.", "X.NIMAP.", "X.NISysABP.", "X.Na.", "X.PaCO2.", "X.PaO2.", "X.Platelets.", "X.RespRate.", "X.SaO2.",
"X.SysABP.", "X.Temp.", "X.TroponinI.", "X.Urine.", "X.WBC.", "X.Weight.", "X.pH.", "X.TroponinT.")
t24=data.frame(t24)
t48=data.frame(t48)
if (!is.na(t24$X.Weight.) & t24$X.Weight.==-1) {t24$X.Weight.=NA}
if (!is.na(t48$X.Weight.) & t48$X.Weight.==-1) {t48$X.Weight.=NA}
if (!is.na(t24$X.Height.) & t24$X.Height.==-1) {t24$X.Height.=NA}
if (!is.na(t48$X.Height.) & t48$X.Height.==-1) {t48$X.Height.=NA}
for (i in 1:ncol(t24))
{
t24[[i]]=mapply(function(x,y) { if (is.na(x)) {x=y}
else{x=x}}, t24[[i]], t48[[i]])
}
for (i in 1:ncol(t48))
{
t48[[i]]=mapply(function(x,y) { if (is.na(x)) {x=y}
else{x=x}}, t48[[i]], t24[[i]])
}
a=t24
b=t48
#Mech vent variable
for (i in 1:nrow(a))
{
if (is.na(a$X.MechVent.[i]))
{a$X.MechVent.[i]=0}
else
{a$X.MechVent.[i]=1
}
}
##adding BMI for 24
BMI=a$X.Weight./((a$X.Height./100)^2)
a=data.frame(a,BMI)
##adding BMI for 48
BMI=b$X.Weight./((b$X.Height./100)^2)
b=data.frame(b,BMI)
#delta (48-24)
delta=b-a
#getting rid off delta of (record id,age,heigth,gender,icutype,mechanic ventilator)
delta=delta[c(-1,-5,-14,-20,-19,-24)]
#str(delta)
#if delta=0 put it NaN
delta3=delta
for (i in 1:ncol(delta))
{
delta3[i]=sapply(delta[[i]],function(x){if ( !is.na(x) & x==0) {x=NA}
else {x=x}})
}
d=data.frame(a,delta3)
patientRecordID=d$X.RecordID.
###define the input for logistic regression
#GCS
GCS=d$X.GCS./15
#AGE grouping
agee=sapply(d$X.Age., function(age) {
if (age<20)
{age2=1}
else if (age<30)
{age2=2}
else if (age<40)
{age2=3}
else if (age<50)
{age2=4}
else if (age<60)
{age2=5}
else if (age<70)
{age2=6}
else {age2=7}})
agee=agee/7
##define final threshhold:
thresh=0.3362
if (is.na(d$X.ALP.))
{thresh=0.28}
##log transformation:
bilbi=log(d$X.Bilirubin.+0.01)
alp=log(d$X.ALP.+0.01)
alt=log(d$X.ALT.+0.01)
ast=log(d$X.AST.+10)
pao2=log(1+d$X.PaO2.)
paco2=log(1+d$X.PaCO2.)
fio2=log(0.01+d$X.FiO2.)
K=log(0.01+d$X.K.)
Mg=log(0.01+d$X.Mg.)
lactate=log(0.1+d$X.Lactate.)
bun=log(1+d$X.BUN.)
createnin=log(0.01+d$X.Creatinine.)
plat=log(d$X.Platelets.+1)
urine=log(1+d$X.Urine.)
wbc=log(1+d$X.WBC.)
Weight=log(0.1+d$X.Weight.)
lactate.d=log(d$X.Lactate..1+9.648484847+1)
FiO2.d=log(d$X.FiO2..1+0.65+1)
Mg.d=log(d$X.Mg..1+2.806666667+1)
Temp.d=log(d$X.Temp..1+7.47142857+1)
###STANDARDIZATION
d1=data.frame(alp, alt, ast, d$X.Albumin., bun, bilbi, createnin, d$X.DiasABP., fio2, d$X.Glucose., d$X.HCO3., d$X.HCT.,
d$X.HR., d$X.Height., K, lactate, d$X.MAP., d$X.MechVent., Mg, d$X.NIDiasABP., d$X.NISysABP., d$X.Na., paco2,
pao2, plat, d$X.SysABP., d$X.Temp., urine, wbc, Weight, d$X.pH., d$BMI, d$X.BUN..1, d$X.Creatinine..1, d$X.DiasABP..1,
d$X.FiO2..1, d$X.GCS..1, d$X.Glucose..1, d$X.HCO3..1, d$X.HCT..1, d$X.HR..1, d$X.K..1, lactate.d, d$X.MAP..1, Mg.d, d$X.NIDiasABP..1,
d$X.NISysABP..1, d$X.Na..1, d$X.PaCO2..1, d$X.PaO2..1, d$X.Platelets..1, d$X.SysABP..1, Temp.d, d$X.Urine..1, d$X.WBC..1,
d$X.pH..1, d$BMI.1)
#ad<-scale(d1, center = TRUE, scale = TRUE)
mean1=c(4.428792, 3.751404, 4.377866, 2.950229, 3.185411, -0.1377478, 0.8950096, 59.65327, -0.5278706, 144.1531, 22.7077, 32.11874,
88.77805, 170.0834, 1.41549, 0.8592167, 81.20807, 0.6358339, 0.6892202, 57.56831, 114.3365, 139.0517, 3.683789, 4.978323,
5.194582, 114.7876, 36.89164, 4.546272, 2.508671, 4.387889, 7.490792, 210.2489, -0.7093838, 0.01846575, 0.602998, -0.08731749,
0.769466, -17.21709, 0.8136559, -1.072103, -1.081304, -0.09029135, 2.282917, 0.8655896, 1.348159, 0.06620109, 1.163557, 0.1317891,
-0.4752844, -39.57504, -20.90938, 2.952707, 2.149836, -7.146416, -0.609545, -0.0006686606, -33.55544)
sd1=c(0.6066783, 1.288708, 1.146172, 0.595363, 0.6869114, 1.043874, 0.04468801, 10.33693, 0.2128702, 54.1664, 4.719987, 5.050711,
16.4147, 16.75468, 0.1387224, 0.4910585, 12.99533, 0.4813709, 0.1716615, 12.22031, 19.80241, 4.647058, 0.1865474, 0.3911182,
0.5808523, 18.40125, 0.9844662, 0.9129201, 0.5381392, 0.2729483, 2.683078, 5723.599, 8.796758, 0.5259562, 7.190248, 0.1080009,
2.662966, 60.13094, 2.942004, 3.375774, 11.18836, 0.5724884, 0.1473419, 9.631489, 0.0857281, 7.683106, 15.97342, 3.160388,
4.981642, 50.95536, 45.65448, 13.30604, 0.1170007, 93.00365, 4.318247, 0.5854828, 107.8054)
d2=mapply(function(x,y,z){z=(z-x)/y},mean1,sd1,d1)
d2=t(data.frame(d2))
colnames(d2)<-c("alp", "alt", "ast", "d.X.Albumin.", "bun", "bilbi", "createnin", "d.X.DiasABP.", "fio2", "d.X.Glucose.",
"d.X.HCO3.", "d.X.HCT.", "d.X.HR.", "d.X.Height.", "K", "lactate", "d.X.MAP.", "d.X.MechVent.", "Mg", "d.X.NIDiasABP.", "d.X.NISysABP.",
"d.X.Na.", "paco2", "pao2", "plat", "d.X.SysABP.", "d.X.Temp.", "urine", "wbc", "Weight", "d.X.pH.", "d.BMI", "d.X.BUN..1",
"d.X.Creatinine..1", "d.X.DiasABP..1", "d.X.FiO2..1", "d.X.GCS..1", "d.X.Glucose..1", "d.X.HCO3..1", "d.X.HCT..1", "d.X.HR..1",
"d.X.K..1", "lactate.d", "d.X.MAP..1", "Mg.d", "d.X.NIDiasABP..1", "d.X.NISysABP..1", "d.X.Na..1", "d.X.PaCO2..1",
"d.X.PaO2..1", "d.X.Platelets..1", "d.X.SysABP..1", "Temp.d", "d.X.Urine..1", "d.X.WBC..1", "d.X.pH..1", "d.BMI.1")
d1=data.frame(GCS,agee,d2)
d3=data.frame( d1$alp, d1$urine, d1$d.X.GCS..1, d1$GCS, d1$bun, d1$d.X.BUN..1, d1$lactate, d1$createnin,
d1$pao2, d1$paco2, d1$d.X.Na..1, d1$lactate.d, d1$d.X.HCO3., d1$d.X.Temp., d1$agee,
d1$d.X.Albumin., d1$d.X.Glucose., d1$d.X.Creatinine..1, d1$ast)
#d3=data.frame(d3,death)
## for all the validation set:
b=d3
###------------------------------------------
# Outliers and Missing values imputation:
###------------------------------------------
##b$d1.alp
b$d1.alp=sapply(b$d1.alp,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.139870244
}
else{x2=x}
})
##b$d1.urine
b$d1.urine=sapply(b$d1.urine,function(x){
if (is.na(x)|| x>=3)
{
x2=0.10219898
}else{x2=x}
})
##b$d1.d.X.GCS..1
b$d1.d.X.GCS..1=sapply(b$d1.d.X.GCS..1,function(x){
if (is.na(x)|| x>=3)
{
x2=-0.063638063
}else{x2=x}
})
##b$d1.GCS
b$d1.GCS=sapply(b$d1.GCS,function(x){
if (is.na(x))
{
x=0.688888889
}
else
{x=x}
})
##b$d1.bun
b$d1.bun=sapply(b$d1.bun,function(x){
if (is.na(x)|| x>=3 || x<=-3)
{
x=-0.104664478
}
else
{x=x}
})
##b$d1.d.X.BUN..1
b$d1.d.X.BUN..1=sapply(b$d1.d.X.BUN..1,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.033036739
}
else{x2=x}
})
##b$d1.lactate
b$d1.lactate=sapply(b$d1.lactate,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.048071416
}
else{x2=x}
})
##b$d1.createnin
b$d1.createnin=sapply(b$d1.createnin,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.363778551
}
else{x2=x}
})
##b$d1.pao2
b$d1.pao2=sapply(b$d1.pao2,function(x){
if (is.na(x)|| x>=3 || x<=-3)
{
x=0.039642804
}
else
{x=x}
})
##b$d1.paco2
b$d1.paco2=sapply(b$d1.paco2,function(x){
if (is.na(x)|| x>=3)
{
x2=0.021621913
}else{x2=x}
})
##b$d1.d.X.Na..1
b$d1.d.X.Na..1=sapply(b$d1.d.X.Na..1,function(x){
if (is.na(x))
{
x2=0.069289425
}else{x2=x}
})
##b$d1.lactate.d
b$d1.lactate.d=sapply(b$d1.lactate.d,function(x){
if (is.na(x))
{
x2=0.115348192
}else{x2=x}
})
##b$d1.d.X.HCO3.
b$d1.d.X.HCO3.=sapply(b$d1.d.X.HCO3.,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.0086942
}
else{x2=x}
})
##b$d1.d.X.Temp.
b$d1.d.X.Temp.=sapply(b$d1.d.X.Temp.,function(x){
if (is.na(x)|| x>=3 || x<=-3)
{
x=0.038366966
}
else
{x=x}
})
##b$d1.agee
b$d1.agee=sapply(b$d1.agee,function(x){
if (is.na(x))
{
x2=0.857142857
}
else
{x2=x}})
##b$d1.d.X.Albumin.
b$d1.d.X.Albumin.=sapply(b$d1.d.X.Albumin.,function(x){
if (is.na(x)|| x>=3 || x<=-3)
{
x=-0.031444932
}
else
{x=x}
})
##b$d1.d.X.Glucose.
b$d1.d.X.Glucose.=sapply(b$d1.d.X.Glucose.,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.242828212
}
else{x2=x}
})
##b$d1.d.X.Creatinine..1
b$d1.d.X.Creatinine..1=sapply(b$d1.d.X.Creatinine..1,function(x){
if (is.na(x))
{
x2=-0.022762693
}
else{x2=x}
})
##b$d1.ast
b$d1.ast=sapply(b$d1.ast,function(x){
if (is.na(x) || x<=-3)
{
x2=-0.276942249
}
else{x2=x}
})
##---------------------------------------------
# (The end of Outliers and Missing values imputation)
##--------------------------------------------
##making 4 variables: (d.X.DiasABP..11, d.X.HCT..11, d.X.HCT.1, d.X.Na.1)
##First, cleaning parent data columns:
##d1$d.X.DiasABP..1
d1$d.X.DiasABP..1=sapply(d1$d.X.DiasABP..1,function(x){
if (is.na(x)|| x>=5 || x<=-5)
{
x2=0
}else{x2=x}
})
##d1$d.X.HCT..1
d1$d.X.HCT..1=sapply(d1$d.X.HCT..1,function(x){
if (is.na(x)|| x>=5 || x<=-5)
{
x2=0
}else{x2=x}
})
##d1$d.X.HCT.
d1$d.X.HCT.=sapply(d1$d.X.HCT.,function(x){
if (is.na(x)|| x>=5 || x<=-5)
{
x2=0
}else{x2=x}
})
##d1$d.X.Na.
d1$d.X.Na.=sapply(d1$d.X.Na.,function(x){
if (is.na(x)|| x>=5 || x<=-5)
{
x2=0
}else{x2=x}
})
## Second: ( from alp v4 ) : study the ones that are less associated. if their outliers are associated with death, then we made it a binary variable.
#d.X.DiasABP..11
#d1$d.X.DiasABP..1<= -2.5 or >= 2.2 can be associated with death therefore we will make this variable a binary:
d.X.DiasABP..11=sapply(d1$d.X.DiasABP..1, function(x) {
if (x>=2.2 ||x<=-2.5 )
{x2=1}
else
{x2=0}
})
#b[60]=d.X.DiasABP..11
#d.X.HCT..11
#d1$d$X.HCT..1<= -2.4 or >= 2.5 can be associated with death therefore we will make this variable a binary:
d.X.HCT..11=sapply(d1$d.X.HCT..1, function(x) {
if (x>=2.5 ||x<=-2.4 )
{x2=1}
else
{x2=0}
})
#b[61]=d.X.HCT..11
#d.X.HCT.1
#d1$d.X.HCT. <= -2.486213828 or >= 2.6 can be associated with death therefore we will make this variable a binary:
d.X.HCT.1=sapply(d1$d.X.HCT., function(x) {
if (x>= 2.6 ||x<= -2.486213828 )
{x2=1}
else
{x2=0}
})
#b[62]=d.X.HCT.1
#d.X.Na.1
#d1$d.X.Na. <= -2 or >= 2 can be associated with death therefore we will make this variable a binary:
d.X.Na.1=sapply(d1$d.X.Na., function(x) {
if (x>= 2 ||x<= -2 )
{x2=1}
else
{x2=0}
})
b=data.frame(b,d.X.DiasABP..11, d.X.HCT..11, d.X.HCT.1, d.X.Na.1)
##-------------------
## LogReg Model:
##-------------------
b24=data.frame(b$d1.alp, b$d1.urine, b$d1.d.X.GCS..1, b$d1.GCS, b$d1.bun, b$d1.d.X.BUN..1, b$d1.lactate, b$d1.createnin,
b$d1.pao2, b$d1.paco2, b$d1.d.X.Na..1, b$d1.lactate.d, b$d1.d.X.HCO3., b$d1.d.X.Temp., b$d1.agee, b$d1.d.X.Albumin.,
b$d1.d.X.Glucose., b$d1.d.X.Creatinine..1, b$d1.ast, b$d.X.DiasABP..11, b$d.X.HCT..11, b$d.X.HCT.1, b$d.X.Na.1)
#build model
attach(b24, warn.conflicts = FALSE)
y= (-1.01648 +(b$d1.alp)*0.32492 +b$d1.urine*-0.383 +b$d1.d.X.GCS..1*-0.63786 +b$d1.GCS*-2.71744 +b$d1.bun*0.52039 +b$d1.d.X.BUN..1*0.45308
+b$d1.lactate*0.29553 +b$d1.createnin*-0.2694 +b$d.X.HCT..11*1.67211 +b$d1.pao2*-0.28836 +b$d1.paco2*-0.41355 +b$d1.d.X.Na..1*0.23602
+b$d1.lactate.d*0.25241 +b$d1.d.X.HCO3.*0.34784 +b$d1.d.X.Temp.*-0.24007 +b$d.X.Na.1*0.7556 +b$d1.agee*0.99375 +b$d1.d.X.Albumin.*-0.21477
+b$d1.d.X.Glucose.*0.14364 +b$d1.d.X.Creatinine..1*-0.17765 +b$d1.ast*0.14487 +b$d.X.DiasABP..11*0.71979 +b$d.X.HCT.1*1.55271)
##Threshold :
#threshold=? for all the data
#threshold=? for troponin data
ghal=y
ab=exp(ghal)/(1+exp(ghal))
predict=0
if (ab >= thresh)
{predict=1}
cat(sprintf("%i,%i,%f\n", patientRecordID, predict, ab));