annotate tools/protein_analysis/seq_analysis_utils.py @ 33:4fcc441269f5 draft

"This is v0.2.12 with black formating and Python 3 next fix etc"
author peterjc
date Thu, 17 Jun 2021 08:33:07 +0000
parents 20da7f48b56f
children 7a2e20baacee
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
1 """A few useful functions for working with FASTA files and running jobs.
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
2
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
3 This module was originally written to hold common code used in both the TMHMM
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
4 and SignalP wrappers in Galaxy.
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
5
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
6 Given Galaxy currently supports Python 2.4+ this cannot use the Python module
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
7 multiprocessing so the function run_jobs instead is a simple pool approach
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
8 using just the subprocess library.
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
9 """
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
10
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
11 from __future__ import print_function
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
12
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
13 import os
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
14 import subprocess
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
15 import sys
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
16
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
17 from time import sleep
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
18
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
19 if sys.version_info[0] < 3:
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
20 range = xrange # noqa: F821
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
21
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
22 __version__ = "0.0.4"
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
23
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
24 try:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
25 from multiprocessing import cpu_count
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
26 except ImportError:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
27 # Must be under Python 2.5, this is copied from multiprocessing:
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
28 def cpu_count():
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
29 """Return the number of CPUs in the system."""
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
30 if sys.platform == "win32":
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
31 try:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
32 num = int(os.environ["NUMBER_OF_PROCESSORS"])
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
33 except (ValueError, KeyError):
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
34 num = 0
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
35 elif "bsd" in sys.platform or sys.platform == "darwin":
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
36 comm = "/sbin/sysctl -n hw.ncpu"
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
37 if sys.platform == "darwin":
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
38 comm = "/usr" + comm
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
39 try:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
40 with os.popen(comm) as p:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
41 num = int(p.read())
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
42 except ValueError:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
43 num = 0
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
44 else:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
45 try:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
46 num = os.sysconf("SC_NPROCESSORS_ONLN")
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
47 except (ValueError, OSError, AttributeError):
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
48 num = 0
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
49
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
50 if num >= 1:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
51 return num
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
52 else:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
53 raise NotImplementedError("cannot determine number of cpus")
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
54
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
55
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
56 def thread_count(command_line_arg, default=1):
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
57 """Determine number of threads to use from the command line args."""
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
58 try:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
59 num = int(command_line_arg)
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
60 except ValueError:
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
61 num = default
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
62 if num < 1:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
63 sys.exit("Threads argument %r is not a positive integer" % command_line_arg)
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
64 # Cap this with the pysical limit of the machine,
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
65 try:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
66 num = min(num, cpu_count())
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
67 except NotImplementedError:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
68 pass
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
69 # For debugging,
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
70 # hostname = os.environ.get("HOSTNAME", "this machine")
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
71 # sys.stderr.write("Using %i cores on %s\n" % (num, hostname))
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
72 return num
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
73
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
74
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
75 def fasta_iterator(filename, max_len=None, truncate=None):
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
76 """Parse FASTA file yielding tuples of (name, sequence)."""
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
77 handle = open(filename)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
78 title, seq = "", ""
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
79 for line in handle:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
80 if line.startswith(">"):
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
81 if title:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
82 if truncate:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
83 seq = seq[:truncate]
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
84 if max_len and len(seq) > max_len:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
85 raise ValueError(
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
86 "Sequence %s is length %i, max length %i"
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
87 % (title.split()[0], len(seq), max_len)
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
88 )
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
89 yield title, seq
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
90 title = line[1:].rstrip()
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
91 seq = ""
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
92 elif title:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
93 seq += line.strip()
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
94 elif not line.strip() or line.startswith("#"):
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
95 # Ignore blank lines, and any comment lines
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
96 # between records (starting with hash).
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
97 pass
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
98 else:
25
41a42022f815 Uploaded v0.2.6, embedded citations
peterjc
parents: 7
diff changeset
99 handle.close()
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
100 raise ValueError("Bad FASTA line %r" % line)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
101 handle.close()
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
102 if title:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
103 if truncate:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
104 seq = seq[:truncate]
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
105 if max_len and len(seq) > max_len:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
106 raise ValueError(
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
107 "Sequence %s is length %i, max length %i"
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
108 % (title.split()[0], len(seq), max_len)
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
109 )
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
110 yield title, seq
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
111 raise StopIteration
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
112
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
113
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
114 def split_fasta(
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
115 input_filename,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
116 output_filename_base,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
117 n=500,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
118 truncate=None,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
119 keep_descr=False,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
120 max_len=None,
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
121 ):
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
122 """Split FASTA file into sub-files each of at most n sequences.
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
123
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
124 Returns a list of the filenames used (based on the input filename).
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
125 Each sequence can also be truncated (since we only need the start for
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
126 SignalP), and have its description discarded (since we don't usually
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
127 care about it and some tools don't like very long title lines).
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
128
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
129 If a max_len is given and any sequence exceeds it no temp files are
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
130 created and an exception is raised.
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
131 """
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
132 iterator = fasta_iterator(input_filename, max_len, truncate)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
133 files = []
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
134 try:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
135 while True:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
136 records = []
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
137 for i in range(n):
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
138 try:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
139 records.append(next(iterator))
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
140 except StopIteration:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
141 break
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
142 if not records:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
143 break
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
144 new_filename = "%s.%i.tmp" % (output_filename_base, len(files))
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
145 handle = open(new_filename, "w")
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
146 if keep_descr:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
147 for title, seq in records:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
148 handle.write(">%s\n" % title)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
149 for i in range(0, len(seq), 60):
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
150 handle.write(seq[i : i + 60] + "\n")
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
151 else:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
152 for title, seq in records:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
153 handle.write(">%s\n" % title.split()[0])
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
154 for i in range(0, len(seq), 60):
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
155 handle.write(seq[i : i + 60] + "\n")
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
156 handle.close()
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
157 files.append(new_filename)
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
158 # print "%i records in %s" % (len(records), new_filename)
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
159 except ValueError as err:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
160 # Max length failure from parser - clean up
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
161 try:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
162 handle.close()
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
163 except Exception:
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
164 pass
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
165 for f in files:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
166 if os.path.isfile(f):
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
167 os.remove(f)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
168 raise err
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
169 for f in files:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
170 assert os.path.isfile(f), "Missing split file %r (!??)" % f
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
171 return files
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
172
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
173
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
174 def run_jobs(jobs, threads, pause=10, verbose=False, fast_fail=True):
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
175 """Take list of cmd strings, return dict with error levels."""
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
176 pending = jobs[:]
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
177 running = []
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
178 results = {}
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
179 skipped = []
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
180 if threads == 1:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
181 # Special case this for speed, don't need the waits
7
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
182 for cmd in jobs:
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
183 results[cmd] = subprocess.call(cmd, shell=True)
5e62aefb2918 Uploaded v0.1.2 to Test Tool Shed
peterjc
parents: 6
diff changeset
184 return results
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
185 failed = False
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
186 while pending or running:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
187 # See if any have finished
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
188 for (cmd, process) in running:
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
189 return_code = process.poll() # non-blocking
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
190 if return_code is not None:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
191 results[cmd] = return_code
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
192 if return_code:
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
193 failed = True
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
194 running = [(cmd, process) for (cmd, process) in running if cmd not in results]
3
fe10f448d641 Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents: 0
diff changeset
195 if verbose:
32
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
196 print(
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
197 "%i jobs pending, %i running, %i completed"
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
198 % (len(pending), len(running), len(results))
20da7f48b56f "Check this is up to date with all 2020 changes"
peterjc
parents: 30
diff changeset
199 )
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
200 # See if we can start any new threads
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
201 if pending and failed and fast_fail:
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
202 # Don't start any more jobs
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
203 if verbose:
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
204 print("Failed, will not start remaining %i jobs" % len(pending))
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
205 skipped = pending
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
206 pending = []
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
207 while pending and len(running) < threads:
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
208 cmd = pending.pop(0)
3
fe10f448d641 Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents: 0
diff changeset
209 if verbose:
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
210 print(cmd)
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
211 process = subprocess.Popen(cmd, shell=True)
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
212 running.append((cmd, process))
29
3cb02adf4326 v0.2.9 Python style improvements
peterjc
parents: 26
diff changeset
213 # Loop...
6
39a6e46cdda3 Migrated tool version 0.0.9 from old tool shed archive to new tool shed repository
peterjc
parents: 3
diff changeset
214 sleep(pause)
3
fe10f448d641 Migrated tool version 0.0.6 from old tool shed archive to new tool shed repository
peterjc
parents: 0
diff changeset
215 if verbose:
30
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
216 print("%i jobs completed" % len(results))
6d9d7cdf00fc v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
peterjc
parents: 29
diff changeset
217 assert set(jobs) == set(results).union(skipped)
0
a2eeeaa6f75e Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
peterjc
parents:
diff changeset
218 return results