diff --git a/Manager.py b/Manager.py index b1b4ecb..f9453b5 100644 --- a/Manager.py +++ b/Manager.py @@ -133,7 +133,7 @@ def process_jobs(self,InputData,Job): self.subInfo.pop() else: self.totalFiles += self.subInfo[-1].numberOfFiles - self.subInfo[-1].reset_resubmit(self.header.AutoResubmit) #Reset the retries every time you start + # self.subInfo[-1].reset_resubmit(self.header.AutoResubmit) #Reset the retries every time you start write_script(processName[0],self.workdir,self.header,self.sl6_container) #Write the scripts you need to start the submission gc.enable() #submit the jobs to the batch as array job @@ -173,7 +173,7 @@ def resubmit_jobs(self): process.reachedBatch[it-1] = False #see how many jobs finished, were copied to workdir - def check_jobstatus(self, OutputDirectory, nameOfCycle,remove = False, autoresubmit = True): + def check_jobstatus(self, OutputDirectory, nameOfCycle, autoresubmit = False): missing = open(self.workdir+'/missing_files.txt','w+') waitingFlag_autoresub = False missingRootFiles = 0 diff --git a/sframe_batch.py b/sframe_batch.py index 497988c..e85fd92 100755 --- a/sframe_batch.py +++ b/sframe_batch.py @@ -108,6 +108,9 @@ def SFrameBatchMain(input_options): parser.add_option("--sl6container", action='store_true', help='Use singularity to run inside SL6-container on EL7-Nodes.') + parser.add_option("--AutoResubmit", + action="store_true", + help = 'This will enable the Auto Resubmission of jobs, while taking into account the number of retries specified in the XML. (per default AutoResumbit=0 )') (options, args) = parser.parse_args(input_options) if(options.el7worker): @@ -206,7 +209,7 @@ def SFrameBatchMain(input_options): print ' Result.xml created for further jobs' #submit jobs if asked for if options.submit: manager.submit_jobs(cycle.OutputDirectory,nameOfCycle) - manager.check_jobstatus(cycle.OutputDirectory, nameOfCycle,False,False) + manager.check_jobstatus(cycle.OutputDirectory, nameOfCycle) if options.resubmit: manager.resubmit_jobs() #get once into the loop for resubmission & merging @@ -223,9 +226,9 @@ def SFrameBatchMain(input_options): # This is necessary since qstat sometimes does not find the jobs it should monitor. # So it checks that it does not find the job 5 times before auto resubmiting it. for i in range(6): - manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle) + manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle,options.AutoResubmit) else: - manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle) + manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle,options.AutoResubmit) manager.merge_files(cycle.OutputDirectory,nameOfCycle,cycle.Cycle_InputData) if manager.get_subInfoFinish() or (not manager.merge.get_mergerStatus() and manager.missingFiles==0): @@ -236,7 +239,7 @@ def SFrameBatchMain(input_options): time.sleep(5) #print 'Total progress', tot_prog manager.merge_wait() - manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle,False,False) + manager.check_jobstatus(cycle.OutputDirectory,nameOfCycle) print '-'*80 manager.print_status() stop = timeit.default_timer()