Mercurial > repos > peterjc > tmhmm_and_signalp
diff tools/protein_analysis/seq_analysis_utils.py @ 30:6d9d7cdf00fc draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:15:55 -0400 |
parents | 3cb02adf4326 |
children | 20da7f48b56f |
line wrap: on
line diff
--- a/tools/protein_analysis/seq_analysis_utils.py Wed Feb 01 09:46:14 2017 -0500 +++ b/tools/protein_analysis/seq_analysis_utils.py Thu Sep 21 11:15:55 2017 -0400 @@ -7,9 +7,13 @@ multiprocessing so the function run_jobs instead is a simple pool approach using just the subprocess library. """ -import sys + +from __future__ import print_function + import os import subprocess +import sys + from time import sleep __version__ = "0.0.2" @@ -47,6 +51,7 @@ def thread_count(command_line_arg, default=1): + """Determine number of threads to use from the command line args.""" try: num = int(command_line_arg) except ValueError: @@ -137,7 +142,7 @@ handle.close() files.append(new_filename) # print "%i records in %s" % (len(records), new_filename) - except ValueError, err: + except ValueError as err: # Max length failure from parser - clean up try: handle.close() @@ -152,37 +157,47 @@ return files -def run_jobs(jobs, threads, pause=10, verbose=False): +def run_jobs(jobs, threads, pause=10, verbose=False, fast_fail=True): """Takes list of cmd strings, returns dict with error levels.""" pending = jobs[:] running = [] results = {} + skipped = [] if threads == 1: # Special case this for speed, don't need the waits for cmd in jobs: results[cmd] = subprocess.call(cmd, shell=True) return results + failed = False while pending or running: # See if any have finished for (cmd, process) in running: return_code = process.poll() # non-blocking if return_code is not None: results[cmd] = return_code + if return_code: + failed = True running = [(cmd, process) for (cmd, process) in running if cmd not in results] if verbose: - print "%i jobs pending, %i running, %i completed" \ - % (len(pending), len(running), len(results)) + print("%i jobs pending, %i running, %i completed" % + (len(pending), len(running), len(results))) # See if we can start any new threads + if pending and failed and fast_fail: + # Don't start any more jobs + if verbose: + print("Failed, will not start remaining %i jobs" % len(pending)) + skipped = pending + pending = [] while pending and len(running) < threads: cmd = pending.pop(0) if verbose: - print cmd + print(cmd) process = subprocess.Popen(cmd, shell=True) running.append((cmd, process)) # Loop... sleep(pause) if verbose: - print "%i jobs completed" % len(results) - assert set(jobs) == set(results) + print("%i jobs completed" % len(results)) + assert set(jobs) == set(results).union(skipped) return results