You Snooze You Win: The PhysioNet/Computing in Cardiology Challenge 2018 1.0.0
(4,032 bytes)
#!/usr/bin/env python3
"""
Created on Thu Mar 29 13:47:45 2018
AUTHORS: Mohammad M. Ghassemi
: Benjamin E. Moody
PURPOSE: This script prepares an entry for the physionet 2018 Challenge
REQUIREMENTS: We assume that you have downloaded the data from
https://physionet.org/physiobank/database/challenge/2018/#files
"""
import numpy as np
import os
import sys
import physionetchallenge2018_lib as phyc
from score2018 import Challenge2018Score
from pylab import find
from sklearn.metrics import precision_recall_curve, auc, roc_auc_score
from zipfile import ZipFile, ZIP_DEFLATED
import gc
import train_classifier as T
import run_my_classifier as R
# -----------------------------------------------------------------------------
# Generate the data to train the classifier
# -----------------------------------------------------------------------------
def train():
T.init()
# Generate a data frame that points to the challenge files
tr_files, te_files = phyc.get_files()
# For each subject in the training set...
for i in range(0, np.size(tr_files, 0)):
gc.collect()
print('Preprocessing training subject: %d/%d'
% (i + 1, np.size(tr_files, 0)))
record_name = tr_files.header.values[i][:-4]
T.preprocess_record(record_name)
T.finish()
# -----------------------------------------------------------------------------
# Run the classifier on each training subject, and compute the mean performance
# -----------------------------------------------------------------------------
def score_training_set():
# Generate a data frame that points to the challenge files
tr_files, te_files = phyc.get_files()
score = Challenge2018Score()
for i in range(0, np.size(tr_files, 0)):
gc.collect()
sys.stdout.write('Evaluating training subject: %d/%d'
% (i + 1, np.size(tr_files, 0)))
sys.stdout.flush()
record_name = tr_files.header.values[i][:-4]
predictions = R.classify_record(record_name)
arousals = phyc.import_arousals(tr_files.arousal.values[i])
arousals = np.ravel(arousals)
score.score_record(arousals, predictions, record_name)
auroc = score.record_auroc(record_name)
auprc = score.record_auprc(record_name)
print(' AUROC:%f AUPRC:%f' % (auroc, auprc))
print()
auroc_g = score.gross_auroc()
auprc_g = score.gross_auprc()
print('Training AUROC Performance (gross): %f' % auroc_g)
print('Training AUPRC Performance (gross): %f' % auprc_g)
print()
# -----------------------------------------------------------------------------
# Run the classifier on each test subject, and save the predictions
# for submission
# -----------------------------------------------------------------------------
def evaluate_test_set():
# Generate a data frame that points to the challenge files
tr_files, te_files = phyc.get_files()
for i in range(0, np.size(te_files, 0)):
gc.collect()
print('Evaluating test subject: %d/%d' % (i+1, np.size(te_files, 0)))
record_name = te_files.header.values[i][:-4]
output_file = os.path.basename(record_name) + '.vec'
predictions = R.classify_record(record_name)
np.savetxt(output_file, predictions, fmt='%.3f')
# -----------------------------------------------------------------------------
# Build a zip file for submission to the Challenge
# -----------------------------------------------------------------------------
def package_entry():
with ZipFile('entry.zip', 'w', ZIP_DEFLATED) as myzip:
for dirName, subdirList, fileList in os.walk('.'):
for fname in fileList:
if ('.vec' in fname[-4:] or '.py' in fname[-3:]
or '.pkl' in fname[-4:] or '.txt' in fname[-4:]):
myzip.write(os.path.join(dirName, fname))
if __name__ == '__main__':
train()
score_training_set()
evaluate_test_set()
package_entry()