ECG-Kit 1.0

File: <base>/common/condor/run_a2hbc.condor (13,794 bytes)
#!/bin/bash

#           0   1      2      3         4             5          6         7         8      9                       
databases=( AHA ESTTDB LTSTDB MITBIH-AR MITBIH-AR-DS2 MITBIH-SUP MITBIH-ST INCART Biosigna  MITBIH-LT)
db_fmt=(    AHA MIT    MIT    MIT       MIT           MIT        MIT       MIT    HES       MIT)
pidsXrec=(  3   10      30     3         3             3          3         3      3         30        )
db_sizes=(  154	 90     86     44        22            78         18       75     56        7         )
db_ext=(    ecg dat    dat    dat       dat           dat        dat       dat    hes       dat       )
db_paths=( 	"/extra/database/bio/ecg/thew/aha/" \
		"/extra/database/bio/ecg/thew/European ST-T Database/" \
		"/extra/database/bio/ecg/thew/Long-Term ST Database/"  \
		"/extra/database/bio/ecg/thew/mitbih-ar/"  \
		"/extra/database/bio/ecg/thew/ds2/"  \
		"/extra/database/bio/ecg/thew/MIT-BIH Supraventricular Arrhythmia Database/"  \
		"/extra/database/bio/ecg/thew/MIT-BIH ST Change Database/" \
		"/extra/database/bio/ecg/thew/St Petersburg Institute of Cardiological Technics 12-lead Arrhythmia Database/"  \
		"/extra/database/bio/ecg/thew/biosigna/"  \
		"/extra/database/bio/ecg/thew/The MIT-BIH Long Term Database/"  \
		)

#Typical configuration
 op_modes=(		 auto slightly-assisted  assisted assisted  )
 NumOfClusters=(	 9    9 		 9	  12	    )
 ClusteringRepetitions=( 1    1 		 1	  1	    )
 ClusterPresence=(	 50   75		 50	  50	    )

#Long-term configuration
#op_modes=(		    assisted	 )
#NumOfClusters=(	    15  	   )
#ClusteringRepetitions=(    5		   )
#ClusterPresence=(	    50  	   )



#Esto sirve para que no haya dependencias de hacer algunos JOBS antes que otros.
#finish_preproc_first=1
finish_preproc_first=0

#Esto sirve para limitar la cantidad de procesos que corren en CONDOR.
#limited=1
limited=0

#numero de veces que reintenta un registro cuando ocurre un error. Esto esta 
#pensado para cuando falla al acceder a un archivo en el filesystem en red.
retries=1

submit_delay=0
processes_running_total=1200
processes_idle_total=700 #esto es para no encolar mucho trabajo en Condor
databases_running=4
iterations=60


if [ "$limited" -eq "0" ]
then
	databases_running=${#databases_idx[@]}
fi

aux_str="*."
tmp_path=/extra/scratch/bio/mllamedo/tmp/
condor_output_path=/home/bio/mllamedo/ecg_classification/tmp/condor_output/

#do_cleanup=1 #SI limpia temporales anteriores
do_cleanup=0 #NO limpia temporales anteriores

#echo "Cleaning previous $condor_output_path_this_work/global.dag files ..."
#rm ./global.*
#echo "done."

global_parent_string="PARENT" 

aux=$(echo "scale=0; ${#databases[@]}-1" | bc)
#databases_idx=($(seq 0 $aux) )
#databases_idx=($(seq 0 8) )
databases_idx=(3 4)


if [ "$databases_running" -ge "${#databases_idx[@]}" ]
then
	databases_running=${#databases_idx[@]}
fi

timestamp=$(date +%Y_%m_%d-%H_%M_%S)

condor_output_path_this_work="$condor_output_path$timestamp"
if [ ! -d $condor_output_path_this_work ]
then
	mkdir $condor_output_path_this_work
fi

echo "# Dag generated by MEC" > $condor_output_path_this_work/global.dag

#Fabrico el archivos de configuracion de cada proceso.
echo "Universe = vanilla" > $condor_output_path_this_work/aux.condor
echo "InitialDir = /home/bio/mllamedo/ecg_classification/a2hbc" >> $condor_output_path_this_work/aux.condor
echo "Executable      = \$(InitialDir)/scripts/common/condor/condor_exec.sh" >> $condor_output_path_this_work/aux.condor
echo "GetEnv	= false" >> $condor_output_path_this_work/aux.condor
echo "Requirements = ((Arch == \"INTEL\") || (Arch == \"X86_64\")) && (OpSys == \"LINUX\") && (Disk >= DiskUsage) && ((Memory * 1024) >= ImageSize) && (TARGET.FileSystemDomain == MY.FileSystemDomain)" >> $condor_output_path_this_work/aux.condor
echo "+ShortJob = TRUE" >> $condor_output_path_this_work/aux.condor
echo "nice_user = true" >> $condor_output_path_this_work/aux.condor

#lo duplico porque tambien usare esta conf para la recoleccion de resultados.
cp $condor_output_path_this_work/aux.condor $condor_output_path_this_work/aux_recs_results_collector.condor
cp $condor_output_path_this_work/aux.condor $condor_output_path_this_work/aux_db_results_collector.condor

echo "Arguments  = a2hbc(	\\" >> $condor_output_path_this_work/aux.condor
echo "		   'recording_name', '\$(recording_name)', \\" >> $condor_output_path_this_work/aux.condor
echo "		   'recording_format', '\$(recording_format)', \\" >> $condor_output_path_this_work/aux.condor
echo "		   'op_mode', '\$(op_mode)', \\" >> $condor_output_path_this_work/aux.condor
echo "		   'SimulateExpert', true, \\" >> $condor_output_path_this_work/aux.condor
echo "		   'Repetitions', \$(Repetitions), \\" >> $condor_output_path_this_work/aux.condor
echo "		   'tmp_path', '\$(tmp_path)', \\" >> $condor_output_path_this_work/aux.condor
echo "		   'NumOfClusters', \$(NumOfClusters), \\" >> $condor_output_path_this_work/aux.condor
echo "		   'ClusteringRepetitions', \$(ClusteringRepetitions), \\" >> $condor_output_path_this_work/aux.condor
echo "		   'ClusterPresence', \$(ClusterPresence), \\" >> $condor_output_path_this_work/aux.condor
echo "		   'cant_pids', \$(cant_pids), \\" >> $condor_output_path_this_work/aux.condor
echo "		   'this_pid', \$(Process)+1 \\" >> $condor_output_path_this_work/aux.condor
echo "		   );" >> $condor_output_path_this_work/aux.condor
echo "Queue \$(cant_pids)" >> $condor_output_path_this_work/aux.condor

echo "Arguments  = CollectResutls('\$(tmp_path)', '\$(op_mode)' );" >> $condor_output_path_this_work/aux_recs_results_collector.condor
echo "Queue 1" >> $condor_output_path_this_work/aux_recs_results_collector.condor

echo "Error = ../tmp/condor_output/$timestamp/global.err" > $condor_output_path_this_work/global_db_results_collector.condor
echo "Log = ../tmp/condor_output/$timestamp/global.log" >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Output = ../tmp/condor_output/$timestamp/global.out" >> $condor_output_path_this_work/global_db_results_collector.condor
cat $condor_output_path_this_work/aux_db_results_collector.condor >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Arguments  = ResultsForAllDatabases('\$(tmp_path)', '\$(op_mode)' );" >> $condor_output_path_this_work/global_db_results_collector.condor
echo "Queue 1" >> $condor_output_path_this_work/global_db_results_collector.condor

for ((iii=0; iii<${#databases_idx[@]}; iii++))
do
        ii=${databases_idx[$iii]}
		
	#echo ${databases[${ii}]} ${db_paths[${ii}]}

#	echo "Cleaning previous ${databases[${ii}]} database logs ..."
#	rm $condor_output_path_this_work/${databases[${ii}]}.*
#	echo "done."

	num_recs_found=$(ls "${db_paths[${ii}]}"*.${db_ext[${ii}]}| wc -l )

	cant_pids=${pidsXrec[${ii}]}
	repetitions=$(echo "scale=0; $iterations/$cant_pids" | bc)				
	
	if [ "$num_recs_found" -eq "${db_sizes[${ii}]}" ]
	then

		if [ "$do_cleanup" -eq "1" ]
		then
			log_files=( $(find ${tmp_path}${databases[${ii}]} -name "tmpfile*.*" -print) )
			if [ "${#log_files[@]}" -gt "0" ]
			then
				echo "Cleaning previous ${databases[${ii}]} tmp files ..."
				echo ${log_files[@]} | xargs rm
				echo "done."
			fi		      
		fi	
		
		echo "# Dag generated by MEC" > "$condor_output_path_this_work/${databases[${ii}]}.dag"

		for ((kk=0; kk<${#op_modes[@]}; kk++))
		do

			vars_str=()
			jj=0

			op_mode=${op_modes[${kk}]}

			condor_output_path_this_db_this_mode="$condor_output_path_this_work/${databases[${ii}]}_$op_mode${kk}"

			echo "Error = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}_\$(Cluster)_\$(Process).err" > $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
			echo "Log = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.log" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
			echo "Output = /dev/null" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
			cat $condor_output_path_this_work/aux.condor >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor
		      
			parent_string="PARENT " 
					      
			while read rec_name
			do


				xbase=${rec_name##*/}
				rec_name_only=${xbase%.*}

				job_name="${rec_name_only}_${op_mode}${kk}"
				echo "JOB ${job_name} $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}.condor" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
				#echo "RETRY ${job_name} $retries" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

				parent_string="${parent_string} ${job_name} "			

#concurrency_limits = \"'mllamedo:$PIDs_X_DB_runing'\" \

vars_str[$jj]="VARS ${job_name} \
recording_name=\"${rec_name}\" \
recording_format=\"${db_fmt[${ii}]}\" \
op_mode=\"${op_mode}\" \
tmp_path=\"${tmp_path}${databases[${ii}]}/\" \
NumOfClusters=\"${NumOfClusters[${kk}]}\" \
ClusteringRepetitions=\"${ClusteringRepetitions[${kk}]}\" \
ClusterPresence=\"${ClusterPresence[${kk}]}\" \
Repetitions=\"${repetitions}\" \
cant_pids=\"${cant_pids}\" "

			let "jj += 1"                				
#break

			#esto es para los paths con espacios, que los pase como un solo string
			done < <(find "${db_paths[${ii}]}"*.${db_ext[${ii}]} )

			echo "Error = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.err" > $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
			echo "Log = ../tmp/condor_output/$timestamp/${databases[${ii}]}_$op_mode${kk}.log" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
			echo "Output = /dev/null" >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor
			cat $condor_output_path_this_work/aux_recs_results_collector.condor >> $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor

			echo "JOB recs_results_collector_${op_mode}${kk} $condor_output_path_this_work/${databases[${ii}]}_${op_mode}${kk}_recs_result_collector.condor" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

			parent_string="${parent_string} CHILD recs_results_collector_${op_mode}${kk}"
			echo $parent_string >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

			for ((jj=0; jj<${#vars_str[@]}; jj++))
			do
				echo ${vars_str[$jj]} >> "$condor_output_path_this_work/${databases[${ii}]}.dag"
			done

		      echo "VARS recs_results_collector_${op_mode}${kk} op_mode=\"${op_mode}\" tmp_path=\"${tmp_path}${databases[${ii}]}\" " >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

		done

		if [ "$finish_preproc_first" -eq "1" ]
		then

			if [ "${#op_modes[@]}" -ge "2" ]
			then
				while read rec_name
				do
				     xbase=${rec_name##*/}
				     rec_name_only=${xbase%.*}

				     intra_DB_parent_string="PARENT ${rec_name_only}_${op_modes[0]}0 CHILD" 

				     for ((kk=1; kk<${#op_modes[@]}; kk++))
				     do
					  intra_DB_parent_string="${intra_DB_parent_string} ${rec_name_only}_${op_modes[$kk]}${kk} "			
				     done

				     echo $intra_DB_parent_string >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

#break

				#esto es para los paths con espacios, que los pase como un solo string
				done < <(find "${db_paths[${ii}]}"*.${db_ext[${ii}]} )
			fi
		fi
				
		echo "CONFIG $condor_output_path_this_work/${databases[${ii}]}.config.dag" >> "$condor_output_path_this_work/${databases[${ii}]}.dag"

 		recordings_running=$(echo "scale=0; $processes_running_total/$databases_running/$cant_pids" | bc)				

		if [ "$limited" -eq "1" ]
		then
			echo "DAGMAN_MAX_JOBS_SUBMITTED=$recordings_running" > $condor_output_path_this_work/${databases[${ii}]}.config.dag
			echo "${databases[${ii}]} correra $recordings_running registro/s a la vez."
			echo "DAGMAN_SUBMIT_DELAY=$submit_delay" >> $condor_output_path_this_work/${databases[${ii}]}.config.dag
		else
 			recordings_idle=$(echo "scale=0; $processes_idle_total/$databases_running/$cant_pids" | bc)				
			echo "DAGMAN_MAX_JOBS_IDLE=$recordings_idle" > $condor_output_path_this_work/${databases[${ii}]}.config.dag
			echo "${databases[${ii}]} limitara a $recordings_idle registros idle."
		fi

		echo "DAGMAN_PENDING_REPORT_INTERVAL=300" >> $condor_output_path_this_work/${databases[${ii}]}.config.dag

		#cat $condor_output_path_this_work/${databases[${ii}]}.dag

		echo "Generating dag files ${databases[${ii}]} work ..."
				
		condor_submit_dag -f -no_submit $condor_output_path_this_work/${databases[${ii}]}.dag
		
		echo "JOB ${databases[${ii}]} $condor_output_path_this_work/${databases[${ii}]}.dag.condor.sub" >> $condor_output_path_this_work/global.dag
		global_parent_string="${global_parent_string} ${databases[${ii}]} "			
		
		
	else
		echo "Expected ${db_sizes[${ii}]} recs, found ${num_recs_found}. Check path ${db_paths[${ii}]}"
		#echo ${db_sizes[${ii}]} $recs_found ${db_paths[${ii}]}
		exit
	fi
	
done

global_parent_string="${global_parent_string} CHILD "


for ((kk=0; kk<${#op_modes[@]}; kk++))
do
			echo "JOB databases_collector_${op_modes[${kk}]}${kk} $condor_output_path_this_work/global_db_results_collector.condor" >> $condor_output_path_this_work/global.dag
			global_parent_string="${global_parent_string} databases_collector_${op_modes[${kk}]}${kk}"
done

echo $global_parent_string >> $condor_output_path_this_work/global.dag

for ((kk=0; kk<${#op_modes[@]}; kk++))
do
			echo "VARS databases_collector_${op_modes[${kk}]}${kk} op_mode=\"${op_modes[${kk}]}\" tmp_path=\"${tmp_path}\" " >> $condor_output_path_this_work/global.dag
done

#return

if [ "$limited" -eq "1" ]
then
	condor_submit_dag -f -maxjobs $databases_running $condor_output_path_this_work/global.dag
else
	condor_submit_dag -f $condor_output_path_this_work/global.dag
fi

echo "Logs en $condor_output_path_this_work"