You Snooze You Win: The PhysioNet/Computing in Cardiology Challenge 2018 1.0.0
(2,876 bytes)
#!/usr/bin/env python3
import os
import sys
import glob
import numpy as np
import physionetchallenge2018_lib as phyc
import matplotlib
from pylab import find
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
def init():
# Create the 'models' subdirectory and delete any existing model files
try:
os.mkdir('models')
except OSError:
pass
for f in glob.glob('models/*_model.pkl'):
os.remove(f)
def preprocess_record(record_name):
header_file = record_name + '.hea'
signal_file = record_name + '.mat'
arousal_file = record_name + '-arousal.mat'
# Get the signal names from the header file
signal_names, Fs, n_samples = phyc.import_signal_names(header_file)
signal_names = list(np.append(signal_names, 'arousals'))
# Convert this subject's data into a pandas dataframe
this_data = phyc.get_subject_data(arousal_file, signal_file, signal_names)
# ----------------------------------------------------------------------
# Generate the Features for the classificaition model - variance of SaO2
# ----------------------------------------------------------------------
# For the baseline, let's only look at how SaO2 might predict arousals
SaO2 = this_data.get(['SaO2']).values
arousals = this_data.get(['arousals']).values
# We select a window size of 60 seconds with no overlap to compute
# the features
step = Fs * 60
window_size = Fs * 60
# Initialize the matrices that store our training data
X_subj = np.zeros([((n_samples) // step), 1])
Y_subj = np.zeros([((n_samples) // step), 1])
# Extract the variance of the SaO2 in 60 second windows as a feature
for idx, k in enumerate(range(0, (n_samples-step+1), step)):
X_subj[idx, 0] = np.var(np.transpose(SaO2[k:k+window_size]), axis=1)
Y_subj[idx] = np.max(arousals[k:k+window_size])
# Ignore records that do not contain any arousals
if not np.any(Y_subj):
sys.stderr.write('no arousals found in %s\n' % record_name)
return
# ---------------------------------------------------------------------
# Train a (multi-class) Logistic Regression classifier
# ---------------------------------------------------------------------
my_classifier = LogisticRegression()
my_classifier.fit(X_subj, np.ravel(Y_subj))
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Save this algorithm for submission to Physionet Challenge:
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
model_file = 'models/%s_model.pkl' % os.path.basename(record_name)
joblib.dump(my_classifier, model_file)
def finish():
pass
if __name__ == '__main__':
init()
for record in sys.argv[1:]:
preprocess_record(record)
finish()