ECG-Kit 1.0
(13,794 bytes)
#!/bin/bash
# 0 1 2 3 4 5 6 7 8 9
databases=( AHA ESTTDB LTSTDB MITBIH-AR MITBIH-AR-DS2 MITBIH-SUP MITBIH-ST INCART Biosigna MITBIH-LT)
db_fmt=( AHA MIT MIT MIT MIT MIT MIT MIT HES MIT)
pidsXrec=( 3 10 30 3 3 3 3 3 3 30 )
db_sizes=( 154 90 86 44 22 78 18 75 56 7 )
db_ext=( ecg dat dat dat dat dat dat dat hes dat )
db_paths=( "/extra/database/bio/ecg/thew/aha/" \
"/extra/database/bio/ecg/thew/European ST-T Database/" \
"/extra/database/bio/ecg/thew/Long-Term ST Database/" \
"/extra/database/bio/ecg/thew/mitbih-ar/" \
"/extra/database/bio/ecg/thew/ds2/" \
"/extra/database/bio/ecg/thew/MIT-BIH Supraventricular Arrhythmia Database/" \
"/extra/database/bio/ecg/thew/MIT-BIH ST Change Database/" \
"/extra/database/bio/ecg/thew/St Petersburg Institute of Cardiological Technics 12-lead Arrhythmia Database/" \
"/extra/database/bio/ecg/thew/biosigna/" \
"/extra/database/bio/ecg/thew/The MIT-BIH Long Term Database/" \
)
#Typical configuration
op_modes=( auto slightly-assisted assisted assisted )
NumOfClusters=( 9 9 9 12 )
ClusteringRepetitions=( 1 1 1 1 )
ClusterPresence=( 50 75 50 50 )
#Long-term configuration
#op_modes=( assisted )
#NumOfClusters=( 15 )
#ClusteringRepetitions=( 5 )
#ClusterPresence=( 50 )
#Esto sirve para que no haya dependencias de hacer algunos JOBS antes que otros.
#finish_preproc_first=1
finish_preproc_first=0
#Esto sirve para limitar la cantidad de procesos que corren en CONDOR.
#limited=1
limited=0
#numero de veces que reintenta un registro cuando ocurre un error. Esto esta
#pensado para cuando falla al acceder a un archivo en el filesystem en red.
retries=1
submit_delay=0
processes_running_total=1200
processes_idle_total=700 #esto es para no encolar mucho trabajo en Condor
databases_running=4
iterations=60
if [ "$limited" -eq "0" ]
then
databases_running=${#databases_idx[@]}
fi
aux_str="*."
tmp_path=/extra/scratch/bio/mllamedo/tmp/
condor_output_path=/home/bio/mllamedo/ecg_classification/tmp/condor_output/
#do_cleanup=1 #SI limpia temporales anteriores
do_cleanup=0 #NO limpia temporales anteriores
#echo "Cleaning previous $condor_output_path_this_work/global.dag files ..."
#rm ./global.*
#echo "done."
global_parent_string="PARENT"
aux=$(echo "scale=0; ${#databases[@]}-1" | bc)
#databases_idx=($(seq 0 $aux) )
#databases_idx=($(seq 0 8) )
databases_idx=(3 4)
if [ "$databases_running" -ge "${#databases_idx[@]}" ]
then
databases_running=${#databases_idx[@]}
fi
timestamp=$(date +%Y_%m_%d-%H_%M_%S)
condor_output_path_this_work="$condor_output_path$timestamp"
if [ ! -d $condor_output_path_this_work ]
then
mkdir $condor_output_path_this_work
fi
echo "# Dag generated by MEC" > $condor_output_path_this_work/global.dag
#Fabrico el archivos de configuracion de cada proceso.
echo "Universe = vanilla" > $condor_output_path_this_work/aux.condor
echo "InitialDir = /home/bio/mllamedo/ecg_classification/a2hbc" >> $condor_output_path_this_work/aux.condor
echo "Executable = \$(InitialDir)/scripts/common/condor/condor_exec.sh" >> $condor_output_path_this_work/aux.condor
echo "GetEnv = false" >> $condor_output_path_this_work/aux.condor
echo "Requirements = ((Arch == \"INTEL\") || (Arch == \"X86_64\")) && (OpSys == \"LINUX\") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (TARGET.FileSystemDomain == MY.FileSystemDomain)" >> $condor_output_path_this_work/aux.condor
echo "+ShortJob = TRUE" >> $condor_output_path_this_work/aux.condor
echo "nice_user = true" >> $condor_output_path_this_work/aux.condor
#lo duplico porque tambien usare esta conf para la recoleccion de resultados.
cp $condor_output_path_this_work/aux.condor $condor_output_path_this_work/aux_recs_results_collector.condor
cp $condor_output_path_this_work/aux.condor $condor_output_path_this_work/aux_db_results_collector.condor
echo "Arguments = a2hbc( \\" >> $condor_output_path_this_work/aux.condor
echo " 'recording_name', '\$(recording_name)', \\" >> $condor_output_path_this_work/aux.condor
echo " 'recording_format', '\$(recording_format)', \\" >> $condor_output_path_this_work/aux.condor
echo " 'op_mode', '\$(op_mode)', \\" >> $condor_output_path_this_work/aux.condor
echo " 'SimulateExpert', true, \\" >> $condor_output_path_this_work/aux.condor
echo " 'Repetitions', \$(Repetitions), \\" >> $condor_output_path_this_work/aux.condor
echo " 'tmp_path', '\$(tmp_path)', \\" >> $condor_output_path_this_work/aux.condor
echo " 'NumOfClusters', \$(NumOfClusters), \\" >> $condor_output_path_this_work/aux.condor
echo " 'ClusteringRepetitions', \$(ClusteringRepetitions), \\" >> $condor_output_path_this_work/aux.condor
echo " 'ClusterPresence', \$(ClusterPresence), \\" >> $condor_output_path_this_work/aux.condor
echo " 'cant_pids', \$(cant_pids), \\" >> $condor_output_path_this_work/aux.condor
echo " 'this_pid', \$(Process)+1 \\" >> $condor_output_path_this_work/aux.condor
echo " );" >> $condor_output_path_this_work/aux.condor
echo "Queue \$(cant_pids)" >> $condor_output_path_this_work/aux.condor
echo "Arguments = CollectResutls('\$(tmp_path)', '\$(op_mode)' );" >> $condor_output_path_this_work/aux_recs_results_collector.condor
echo "Queue 1" >> $condor_output_path_this_work/aux_recs_results_collector.condor
echo "Error = ../tmp/condor_output/$timestamp/global.err" > $condor_output_path_this_work/global_db_results_collector.condor
echo "Log = ../tmp/condor_output/$timestamp/global.log" >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Output = ../tmp/condor_output/$timestamp/global.out" >> $condor_output_path_this_work/global_db_results_collector.condor
cat $condor_output_path_this_work/aux_db_results_collector.condor >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Arguments = ResultsForAllDatabases('\$(tmp_path)', '\$(op_mode)' );" >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Queue 1" >> $condor_output_path_this_work/global_db_results_collector.condor
for ((iii=0; iii<${#databases_idx[@]}; iii++))
do
ii=${databases_idx[$iii]}
#echo ${databases[${ii}]} ${db_paths[${ii}]}
# echo "Cleaning previous ${databases[${ii}]} database logs ..."
# rm $condor_output_path_this_work/${databases[${ii}]}.*
# echo "done."
num_recs_found=$(ls "${db_paths[${ii}]}"*.${db_ext[${ii}]}| wc -l )
cant_pids=${pidsXrec[${ii}]}
repetitions=$(echo "scale=0; $iterations/$cant_pids" | bc)
if [ "$num_recs_found" -eq "${db_sizes[${ii}]}" ]
then
if [ "$do_cleanup" -eq "1" ]
then
log_files=( $(find ${tmp_path}${databases[${ii}]} -name "tmpfile*.*" -print) )
if [ "${#log_files[@]}" -gt "0" ]
then
echo "Cleaning previous ${databases[${ii}]} tmp files ..."
echo ${log_files[@]} | xargs rm
echo "done."
fi
fi
echo "# Dag generated by MEC" > "$condor_output_path_this_work/${databases[${ii}]}.dag"
for ((kk=0; kk<${#op_modes[@]}; kk++))
do
vars_str=()
jj=0
op_mode=${op_modes[${kk}]}
condor_output_path_this_db_this_mode="$condor_output_path_this_work/${databases[${ii}]}_$op_mode${kk}"
echo "Error = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}_\$(Cluster)_\$(Process).err" > $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
echo "Log = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.log" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
echo "Output = /dev/null" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
cat $condor_output_path_this_work/aux.condor >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
parent_string="PARENT "
while read rec_name
do
xbase=${rec_name##*/}
rec_name_only=${xbase%.*}
job_name="${rec_name_only}_${op_mode}${kk}"
echo "JOB ${job_name} $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
#echo "RETRY ${job_name} $retries" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
parent_string="${parent_string} ${job_name} "
#concurrency_limits = \"'mllamedo:$PIDs_X_DB_runing'\" \
vars_str[$jj]="VARS ${job_name} \
recording_name=\"${rec_name}\" \
recording_format=\"${db_fmt[${ii}]}\" \
op_mode=\"${op_mode}\" \
tmp_path=\"${tmp_path}${databases[${ii}]}/\" \
NumOfClusters=\"${NumOfClusters[${kk}]}\" \
ClusteringRepetitions=\"${ClusteringRepetitions[${kk}]}\" \
ClusterPresence=\"${ClusterPresence[${kk}]}\" \
Repetitions=\"${repetitions}\" \
cant_pids=\"${cant_pids}\" "
let "jj += 1"
#break
#esto es para los paths con espacios, que los pase como un solo string
done < <(find "${db_paths[${ii}]}"*.${db_ext[${ii}]} )
echo "Error = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.err" > $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
echo "Log = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.log" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
echo "Output = /dev/null" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
cat $condor_output_path_this_work/aux_recs_results_collector.condor >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
echo "JOB recs_results_collector_${op_mode}${kk} $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
parent_string="${parent_string} CHILD recs_results_collector_${op_mode}${kk}"
echo $parent_string >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
for ((jj=0; jj<${#vars_str[@]}; jj++))
do
echo ${vars_str[$jj]} >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
done
echo "VARS recs_results_collector_${op_mode}${kk} op_mode=\"${op_mode}\" tmp_path=\"${tmp_path}${databases[${ii}]}\" " >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
done
if [ "$finish_preproc_first" -eq "1" ]
then
if [ "${#op_modes[@]}" -ge "2" ]
then
while read rec_name
do
xbase=${rec_name##*/}
rec_name_only=${xbase%.*}
intra_DB_parent_string="PARENT ${rec_name_only}_${op_modes[0]}0 CHILD"
for ((kk=1; kk<${#op_modes[@]}; kk++))
do
intra_DB_parent_string="${intra_DB_parent_string} ${rec_name_only}_${op_modes[$kk]}${kk} "
done
echo $intra_DB_parent_string >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
#break
#esto es para los paths con espacios, que los pase como un solo string
done < <(find "${db_paths[${ii}]}"*.${db_ext[${ii}]} )
fi
fi
echo "CONFIG $condor_output_path_this_work/${databases[${ii}]}.config.dag" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
recordings_running=$(echo "scale=0; $processes_running_total/$databases_running/$cant_pids" | bc)
if [ "$limited" -eq "1" ]
then
echo "DAGMAN_MAX_JOBS_SUBMITTED=$recordings_running" > $condor_output_path_this_work/${databases[${ii}]}.config.dag
echo "${databases[${ii}]} correra $recordings_running registro/s a la vez."
echo "DAGMAN_SUBMIT_DELAY=$submit_delay" >> $condor_output_path_this_work/${databases[${ii}]}.config.dag
else
recordings_idle=$(echo "scale=0; $processes_idle_total/$databases_running/$cant_pids" | bc)
echo "DAGMAN_MAX_JOBS_IDLE=$recordings_idle" > $condor_output_path_this_work/${databases[${ii}]}.config.dag
echo "${databases[${ii}]} limitara a $recordings_idle registros idle."
fi
echo "DAGMAN_PENDING_REPORT_INTERVAL=300" >> $condor_output_path_this_work/${databases[${ii}]}.config.dag
#cat $condor_output_path_this_work/${databases[${ii}]}.dag
echo "Generating dag files ${databases[${ii}]} work ..."
condor_submit_dag -f -no_submit $condor_output_path_this_work/${databases[${ii}]}.dag
echo "JOB ${databases[${ii}]} $condor_output_path_this_work/${databases[${ii}]}.dag.condor.sub" >> $condor_output_path_this_work/global.dag
global_parent_string="${global_parent_string} ${databases[${ii}]} "
else
echo "Expected ${db_sizes[${ii}]} recs, found ${num_recs_found}. Check path ${db_paths[${ii}]}"
#echo ${db_sizes[${ii}]} $recs_found ${db_paths[${ii}]}
exit
fi
done
global_parent_string="${global_parent_string} CHILD "
for ((kk=0; kk<${#op_modes[@]}; kk++))
do
echo "JOB databases_collector_${op_modes[${kk}]}${kk} $condor_output_path_this_work/global_db_results_collector.condor" >> $condor_output_path_this_work/global.dag
global_parent_string="${global_parent_string} databases_collector_${op_modes[${kk}]}${kk}"
done
echo $global_parent_string >> $condor_output_path_this_work/global.dag
for ((kk=0; kk<${#op_modes[@]}; kk++))
do
echo "VARS databases_collector_${op_modes[${kk}]}${kk} op_mode=\"${op_modes[${kk}]}\" tmp_path=\"${tmp_path}\" " >> $condor_output_path_this_work/global.dag
done
#return
if [ "$limited" -eq "1" ]
then
condor_submit_dag -f -maxjobs $databases_running $condor_output_path_this_work/global.dag
else
condor_submit_dag -f $condor_output_path_this_work/global.dag
fi
echo "Logs en $condor_output_path_this_work"