comparison tools/effectiveT3/effectiveT3.py @ 35:e2711662802b draft default tip

planemo upload for repository https://github.com/peterjc/pico_galaxy/tree/master/tools/effectiveT3 commit d67596914a7bbe183851437eaafe8c7305877e5a-dirty
author peterjc
date Fri, 22 Feb 2019 10:13:22 -0500
parents 011cfce866f1
children
comparison
equal deleted inserted replaced
34:011cfce866f1 35:e2711662802b
10 It then calls the standalone Effective T3 v1.0.1 program (not the 10 It then calls the standalone Effective T3 v1.0.1 program (not the
11 webservice), and reformats the semi-colon separated output into 11 webservice), and reformats the semi-colon separated output into
12 tab separated output for use in Galaxy. 12 tab separated output for use in Galaxy.
13 """ 13 """
14 import os 14 import os
15
15 # We want to be able to use shutil.which, but need Python 3.3+ 16 # We want to be able to use shutil.which, but need Python 3.3+
16 # import shutil 17 # import shutil
17 import subprocess 18 import subprocess
18 import sys 19 import sys
19 20
32 # TODO - Get version of the JAR file dynamically? 33 # TODO - Get version of the JAR file dynamically?
33 print("Wrapper v0.0.20, for %s" % effective_t3_jarname) 34 print("Wrapper v0.0.20, for %s" % effective_t3_jarname)
34 sys.exit(0) 35 sys.exit(0)
35 36
36 if len(sys.argv) != 5: 37 if len(sys.argv) != 5:
37 sys.exit("Require four arguments: model, threshold, input protein FASTA file & output tabular file") 38 sys.exit(
39 "Require four arguments: model, threshold, input protein "
40 "FASTA file & output tabular file"
41 )
38 42
39 model, threshold, fasta_file, tabular_file = sys.argv[1:] 43 model, threshold, fasta_file, tabular_file = sys.argv[1:]
40 44
41 if not os.path.isfile(fasta_file): 45 if not os.path.isfile(fasta_file):
42 sys.exit("Input FASTA file not found: %s" % fasta_file) 46 sys.exit("Input FASTA file not found: %s" % fasta_file)
43 47
44 if threshold not in ["selective", "sensitive"] and not threshold.startswith("cutoff="): 48 if threshold not in ["selective", "sensitive"] and not threshold.startswith("cutoff="):
45 sys.exit("Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold) 49 sys.exit(
50 "Threshold should be selective, sensitive, or cutoff=..., not %r" % threshold
51 )
46 52
47 53
48 def clean_tabular(raw_handle, out_handle): 54 def clean_tabular(raw_handle, out_handle):
49 """Clean up Effective T3 output to make it tabular.""" 55 """Clean up Effective T3 output to make it tabular."""
50 count = 0 56 count = 0
51 positive = 0 57 positive = 0
52 errors = 0 58 errors = 0
53 for line in raw_handle: 59 for line in raw_handle:
54 if not line or line.startswith("#") or line.startswith("Id; Description; Score;"): 60 if (
61 not line
62 or line.startswith("#")
63 or line.startswith("Id; Description; Score;")
64 ):
55 continue 65 continue
56 assert line.count(";") >= 3, repr(line) 66 assert line.count(";") >= 3, repr(line)
57 # Normally there will just be three semi-colons, however the 67 # Normally there will just be three semi-colons, however the
58 # original FASTA file's ID or description might have had 68 # original FASTA file's ID or description might have had
59 # semi-colons in it as well, hence the following hackery: 69 # semi-colons in it as well, hence the following hackery:
80 def run(cmd): 90 def run(cmd):
81 """Run the command line string via subprocess.""" 91 """Run the command line string via subprocess."""
82 # Avoid using shell=True when we call subprocess to ensure if the Python 92 # Avoid using shell=True when we call subprocess to ensure if the Python
83 # script is killed, so too is the child process. 93 # script is killed, so too is the child process.
84 try: 94 try:
85 child = subprocess.Popen(cmd, universal_newlines=True, 95 child = subprocess.Popen(
86 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 96 cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
97 )
87 except Exception as err: 98 except Exception as err:
88 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 99 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
89 # Use .communicate as can get deadlocks with .wait(), 100 # Use .communicate as can get deadlocks with .wait(),
90 stdout, stderr = child.communicate() 101 stdout, stderr = child.communicate()
91 return_code = child.returncode 102 return_code = child.returncode
92 if return_code or stderr.startswith("Exception in thread"): 103 if return_code or stderr.startswith("Exception in thread"):
93 cmd_str = " ".join(cmd) # doesn't quote spaces etc 104 cmd_str = " ".join(cmd) # doesn't quote spaces etc
94 if stderr and stdout: 105 if stderr and stdout:
95 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) 106 sys.exit(
107 "Return code %i from command:\n%s\n\n%s\n\n%s"
108 % (return_code, cmd_str, stdout, stderr)
109 )
96 else: 110 else:
97 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) 111 sys.exit(
112 "Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)
113 )
98 114
99 115
100 try: 116 try:
101 from shutil import which 117 from shutil import which
102 except ImportError: 118 except ImportError:
114 """ 130 """
115 # Check that a given file can be accessed with the correct mode. 131 # Check that a given file can be accessed with the correct mode.
116 # Additionally check that `file` is not a directory, as on Windows 132 # Additionally check that `file` is not a directory, as on Windows
117 # directories pass the os.access check. 133 # directories pass the os.access check.
118 def _access_check(fn, mode): 134 def _access_check(fn, mode):
119 return (os.path.exists(fn) and os.access(fn, mode) and 135 return os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)
120 not os.path.isdir(fn))
121 136
122 # Short circuit. If we're given a full path which matches the mode 137 # Short circuit. If we're given a full path which matches the mode
123 # and it exists, we're done here. 138 # and it exists, we're done here.
124 if _access_check(cmd, mode): 139 if _access_check(cmd, mode):
125 return cmd 140 return cmd
178 effective_t3_jar = None 193 effective_t3_jar = None
179 if not effective_t3_dir or not effective_t3_jar: 194 if not effective_t3_dir or not effective_t3_jar:
180 sys.exit("Effective T3 JAR file %r not found in %r" % (effective_t3_jarname, dirs)) 195 sys.exit("Effective T3 JAR file %r not found in %r" % (effective_t3_jarname, dirs))
181 196
182 if not os.path.isdir(os.path.join(effective_t3_dir, "module")): 197 if not os.path.isdir(os.path.join(effective_t3_dir, "module")):
183 sys.exit("Effective T3 module folder not found: %r" % os.path.join(effective_t3_dir, "module")) 198 sys.exit(
199 "Effective T3 module folder not found: %r"
200 % os.path.join(effective_t3_dir, "module")
201 )
184 202
185 effective_t3_model = os.path.join(effective_t3_dir, "module", model) 203 effective_t3_model = os.path.join(effective_t3_dir, "module", model)
186 if not os.path.isfile(effective_t3_model): 204 if not os.path.isfile(effective_t3_model):
187 sys.stderr.write("Contents of %r is %s\n" 205 sys.stderr.write(
188 % (os.path.join(effective_t3_dir, "module"), 206 "Contents of %r is %s\n"
189 ", ".join(repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module"))))) 207 % (
208 os.path.join(effective_t3_dir, "module"),
209 ", ".join(
210 repr(p) for p in os.listdir(os.path.join(effective_t3_dir, "module"))
211 ),
212 )
213 )
190 sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar) 214 sys.stderr.write("Main JAR was found: %r\n" % effective_t3_jar)
191 sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model) 215 sys.exit("Effective T3 model JAR file not found: %r" % effective_t3_model)
192 216
193 # We will have write access wherever the output should be, 217 # We will have write access wherever the output should be,
194 if tabular_file == "/dev/stdout": 218 if tabular_file == "/dev/stdout":
198 222
199 # Use absolute paths since will change current directory... 223 # Use absolute paths since will change current directory...
200 tabular_file = os.path.abspath(tabular_file) 224 tabular_file = os.path.abspath(tabular_file)
201 fasta_file = os.path.abspath(fasta_file) 225 fasta_file = os.path.abspath(fasta_file)
202 226
203 cmd = ["java", "-jar", effective_t3_jar, 227 cmd = [
204 "-f", fasta_file, 228 "java",
205 "-m", model, 229 "-jar",
206 "-t", threshold, 230 effective_t3_jar,
207 "-o", temp_file, 231 "-f",
208 "-q"] 232 fasta_file,
233 "-m",
234 model,
235 "-t",
236 threshold,
237 "-o",
238 temp_file,
239 "-q",
240 ]
209 241
210 try: 242 try:
211 # Must run from directory above the module subfolder: 243 # Must run from directory above the module subfolder:
212 os.chdir(effective_t3_dir) 244 os.chdir(effective_t3_dir)
213 except Exception: 245 except Exception:
226 out_handle.close() 258 out_handle.close()
227 259
228 os.remove(temp_file) 260 os.remove(temp_file)
229 261
230 if errors: 262 if errors:
231 print("%i sequences, %i positive, %i errors" 263 print("%i sequences, %i positive, %i errors" % (count, positive, errors))
232 % (count, positive, errors))
233 else: 264 else:
234 print("%i/%i sequences positive" % (positive, count)) 265 print("%i/%i sequences positive" % (positive, count))
235 266
236 if count and count == errors: 267 if count and count == errors:
237 # Galaxy will still allow them to see the output file 268 # Galaxy will still allow them to see the output file