Mercurial > repos > peterjc > mira4_assembler
comparison tools/mira4_0/mira4.py @ 32:56b421d59805 draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
author | peterjc |
---|---|
date | Thu, 18 May 2017 13:36:08 -0400 |
parents | fd95aaef8818 |
children | 0785a6537f3e |
comparison
equal
deleted
inserted
replaced
31:fd95aaef8818 | 32:56b421d59805 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """A simple wrapper script to call MIRA and collect its output. | 2 """A simple wrapper script to call MIRA and collect its output. |
3 """ | 3 """ |
4 | |
5 from __future__ import print_function | |
6 | |
4 import os | 7 import os |
8 import shutil | |
9 import subprocess | |
5 import sys | 10 import sys |
6 import subprocess | 11 import tempfile |
7 import shutil | |
8 import time | 12 import time |
9 import tempfile | 13 |
10 from optparse import OptionParser | 14 from optparse import OptionParser |
11 | 15 |
12 #Do we need any PYTHONPATH magic? | 16 # Do we need any PYTHONPATH magic? |
13 from mira4_make_bam import make_bam | 17 from mira4_make_bam import make_bam |
14 | 18 |
15 WRAPPER_VER = "0.0.4" #Keep in sync with the XML file | 19 WRAPPER_VER = "0.0.10" # Keep in sync with the XML file |
16 | 20 |
17 | 21 |
18 def get_version(mira_binary): | 22 def get_version(mira_binary): |
19 """Run MIRA to find its version number""" | 23 """Run MIRA to find its version number.""" |
20 # At the commend line I would use: mira -v | head -n 1 | 24 # At the commend line I would use: mira -v | head -n 1 |
21 # however there is some pipe error when doing that here. | 25 # however there is some pipe error when doing that here. |
22 cmd = [mira_binary, "-v"] | 26 cmd = [mira_binary, "-v"] |
23 try: | 27 try: |
24 child = subprocess.Popen(cmd, | 28 child = subprocess.Popen(cmd, |
25 stdout=subprocess.PIPE, | 29 stdout=subprocess.PIPE, |
26 stderr=subprocess.STDOUT) | 30 stderr=subprocess.STDOUT) |
27 except Exception, err: | 31 except Exception as err: |
28 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 32 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
29 sys.exit(1) | 33 sys.exit(1) |
30 ver, tmp = child.communicate() | 34 ver, tmp = child.communicate() |
31 del child | 35 del child |
32 return ver.split("\n", 1)[0].strip() | 36 return ver.split("\n", 1)[0].strip() |
33 | 37 |
34 #Parse Command Line | 38 |
39 # Parse Command Line | |
35 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | 40 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: |
36 | 41 |
37 $ python mira4.py ... | 42 $ python mira4.py ... |
38 | 43 |
39 This will run the MIRA binary and collect its output files as directed. | 44 This will run the MIRA binary and collect its output files as directed. |
82 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) | 87 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) |
83 mira_convert_ver = get_version(mira_convert) | 88 mira_convert_ver = get_version(mira_convert) |
84 if not mira_convert_ver.strip().startswith("4.0"): | 89 if not mira_convert_ver.strip().startswith("4.0"): |
85 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) | 90 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) |
86 if options.version: | 91 if options.version: |
87 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) | 92 print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)) |
88 if mira_ver != mira_convert_ver: | 93 if mira_ver != mira_convert_ver: |
89 print "WARNING: miraconvert %s" % mira_convert_ver | 94 print("WARNING: miraconvert %s" % mira_convert_ver) |
90 sys.exit(0) | 95 sys.exit(0) |
91 | 96 |
92 if not manifest: | 97 if not manifest: |
93 sys.exit("Manifest is required") | 98 sys.exit("Manifest is required") |
94 elif not os.path.isfile(manifest): | 99 elif not os.path.isfile(manifest): |
119 """ | 124 """ |
120 handle = open(manifest, "r") | 125 handle = open(manifest, "r") |
121 text = handle.read() | 126 text = handle.read() |
122 handle.close() | 127 handle.close() |
123 | 128 |
124 #At time of writing, this is at the end of a file, | 129 # At time of writing, this is at the end of a file, |
125 #but could be followed by a space in future... | 130 # but could be followed by a space in future... |
126 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) | 131 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) |
127 | 132 |
128 #Want to try to ensure this gets written to disk before MIRA attempts | 133 # Want to try to ensure this gets written to disk before MIRA attempts |
129 #to open it - any networked file system may impose a delay... | 134 # to open it - any networked file system may impose a delay... |
130 handle = open(manifest, "w") | 135 handle = open(manifest, "w") |
131 handle.write(text) | 136 handle.write(text) |
132 handle.flush() | 137 handle.flush() |
133 os.fsync(handle.fileno()) | 138 os.fsync(handle.fileno()) |
134 handle.close() | 139 handle.close() |
135 | 140 |
136 | 141 |
137 def log_manifest(manifest): | 142 def log_manifest(manifest): |
138 """Write the manifest file to stderr.""" | 143 """Write the manifest file to stderr.""" |
139 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) | 144 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60)) |
140 with open(manifest) as h: | 145 with open(manifest) as h: |
141 for line in h: | 146 for line in h: |
142 sys.stderr.write(line) | 147 sys.stderr.write(line) |
143 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) | 148 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60)) |
144 | 149 |
145 | 150 |
146 def collect_output(temp, name, handle): | 151 def collect_output(temp, name, handle): |
147 """Moves files to the output filenames (global variables).""" | 152 """Moves files to the output filenames (global variables).""" |
148 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) | 153 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) |
154 sys.exit("Empty output folder") | 159 sys.exit("Empty output folder") |
155 missing = [] | 160 missing = [] |
156 | 161 |
157 old_maf = "%s/%s_out.maf" % (f, name) | 162 old_maf = "%s/%s_out.maf" % (f, name) |
158 if not os.path.isfile(old_maf): | 163 if not os.path.isfile(old_maf): |
159 #Triggered extractLargeContigs.sh? | 164 # Triggered extractLargeContigs.sh? |
160 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) | 165 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) |
161 | 166 |
162 #De novo or single strain mapping, | 167 # De novo or single strain mapping, |
163 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) | 168 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) |
164 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) | 169 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) |
165 if not os.path.isfile(old_fasta): | 170 if not os.path.isfile(old_fasta): |
166 #Mapping (StrainX versus reference) or de novo | 171 # Mapping (StrainX versus reference) or de novo |
167 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) | 172 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) |
168 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) | 173 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) |
169 if not os.path.isfile(old_fasta): | 174 if not os.path.isfile(old_fasta): |
170 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) | 175 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) |
171 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) | 176 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) |
172 | |
173 | 177 |
174 missing = False | 178 missing = False |
175 for old, new in [(old_maf, out_maf), | 179 for old, new in [(old_maf, out_maf), |
176 (old_fasta, out_fasta)]: | 180 (old_fasta, out_fasta)]: |
177 if not os.path.isfile(old): | 181 if not os.path.isfile(old): |
185 log_manifest(manifest) | 189 log_manifest(manifest) |
186 sys.stderr.write("Contents of %r:\n" % f) | 190 sys.stderr.write("Contents of %r:\n" % f) |
187 for filename in sorted(os.listdir(f)): | 191 for filename in sorted(os.listdir(f)): |
188 sys.stderr.write("%s\n" % filename) | 192 sys.stderr.write("%s\n" % filename) |
189 | 193 |
190 #For mapping mode, probably most people would expect a BAM file | 194 # For mapping mode, probably most people would expect a BAM file |
191 #using the reference FASTA file... | 195 # using the reference FASTA file... |
192 if out_bam and out_bam != "-": | 196 if out_bam and out_bam != "-": |
193 if out_maf and out_maf != "-": | 197 if out_maf and out_maf != "-": |
194 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) | 198 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) |
195 else: | 199 else: |
196 #Not collecting the MAF file, use original location | 200 # Not collecting the MAF file, use original location |
197 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) | 201 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) |
198 if msg: | 202 if msg: |
199 sys.exit(msg) | 203 sys.exit(msg) |
204 | |
200 | 205 |
201 def clean_up(temp, name): | 206 def clean_up(temp, name): |
202 folder = "%s/%s_assembly" % (temp, name) | 207 folder = "%s/%s_assembly" % (temp, name) |
203 if os.path.isdir(folder): | 208 if os.path.isdir(folder): |
204 shutil.rmtree(folder) | 209 shutil.rmtree(folder) |
205 | 210 |
206 #TODO - Run MIRA in /tmp or a configurable directory? | 211 |
207 #Currently Galaxy puts us somewhere safe like: | 212 # TODO - Run MIRA in /tmp or a configurable directory? |
208 #/opt/galaxy-dist/database/job_working_directory/846/ | 213 # Currently Galaxy puts us somewhere safe like: |
214 # /opt/galaxy-dist/database/job_working_directory/846/ | |
209 temp = "." | 215 temp = "." |
210 | 216 |
211 name = "MIRA" | 217 name = "MIRA" |
212 | 218 |
213 override_temp(manifest) | 219 override_temp(manifest) |
216 cmd_list = [mira_binary, "-t", str(threads), manifest] | 222 cmd_list = [mira_binary, "-t", str(threads), manifest] |
217 cmd = " ".join(cmd_list) | 223 cmd = " ".join(cmd_list) |
218 | 224 |
219 assert os.path.isdir(temp) | 225 assert os.path.isdir(temp) |
220 d = "%s_assembly" % name | 226 d = "%s_assembly" % name |
221 #This can fail on my development machine if stale folders exist | 227 # This can fail on my development machine if stale folders exist |
222 #under Galaxy's .../database/job_working_directory/ tree: | 228 # under Galaxy's .../database/job_working_directory/ tree: |
223 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) | 229 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) |
224 try: | 230 try: |
225 #Check path access | 231 # Check path access |
226 os.mkdir(d) | 232 os.mkdir(d) |
227 except Exception, err: | 233 except Exception as err: |
228 log_manifest(manifest) | 234 log_manifest(manifest) |
229 sys.stderr.write("Error making directory %s\n%s" % (d, err)) | 235 sys.stderr.write("Error making directory %s\n%s" % (d, err)) |
230 sys.exit(1) | 236 sys.exit(1) |
231 | 237 |
232 #print os.path.abspath(".") | 238 # print(os.path.abspath(".")) |
233 #print cmd | 239 # print(cmd) |
234 | 240 |
235 if out_log and out_log != "-": | 241 if out_log and out_log != "-": |
236 handle = open(out_log, "w") | 242 handle = open(out_log, "w") |
237 else: | 243 else: |
238 handle = open(os.devnull, "w") | 244 handle = open(os.devnull, "w") |
244 del m | 250 del m |
245 handle.write("\n") | 251 handle.write("\n") |
246 handle.write("============================ Starting MIRA now ===============================\n") | 252 handle.write("============================ Starting MIRA now ===============================\n") |
247 handle.flush() | 253 handle.flush() |
248 try: | 254 try: |
249 #Run MIRA | 255 # Run MIRA |
250 child = subprocess.Popen(cmd_list, | 256 child = subprocess.Popen(cmd_list, |
251 stdout=handle, | 257 stdout=handle, |
252 stderr=subprocess.STDOUT) | 258 stderr=subprocess.STDOUT) |
253 except Exception, err: | 259 except Exception as err: |
254 log_manifest(manifest) | 260 log_manifest(manifest) |
255 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 261 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) |
256 #TODO - call clean up? | 262 # TODO - call clean up? |
257 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 263 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) |
258 handle.close() | 264 handle.close() |
259 sys.exit(1) | 265 sys.exit(1) |
260 #Use .communicate as can get deadlocks with .wait(), | 266 # Use .communicate as can get deadlocks with .wait(), |
261 stdout, stderr = child.communicate() | 267 stdout, stderr = child.communicate() |
262 assert not stdout and not stderr #Should be empty as sent to handle | 268 assert not stdout and not stderr # Should be empty as sent to handle |
263 run_time = time.time() - start_time | 269 run_time = time.time() - start_time |
264 return_code = child.returncode | 270 return_code = child.returncode |
265 handle.write("\n") | 271 handle.write("\n") |
266 handle.write("============================ MIRA has finished ===============================\n") | 272 handle.write("============================ MIRA has finished ===============================\n") |
267 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) | 273 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) |
268 if return_code: | 274 if return_code: |
269 print "MIRA took %0.2f hours" % (run_time / 3600.0) | 275 print("MIRA took %0.2f hours" % (run_time / 3600.0)) |
270 handle.write("Return error code %i from command:\n" % return_code) | 276 handle.write("Return error code %i from command:\n" % return_code) |
271 handle.write(cmd + "\n") | 277 handle.write(cmd + "\n") |
272 handle.close() | 278 handle.close() |
273 clean_up(temp, name) | 279 clean_up(temp, name) |
274 log_manifest(manifest) | 280 log_manifest(manifest) |
275 sys.stderr.write("Return error code %i from command:\n" % return_code) | 281 sys.stderr.write("Return error code %i from command:\n" % return_code) |
276 sys.stderr.write(cmd + "\n") | 282 sys.stderr.write(cmd + "\n") |
277 sys.exit(eturn_code) | 283 sys.exit(return_code) |
278 handle.flush() | 284 handle.flush() |
279 | 285 |
280 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 286 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
281 handle.write("\n") | 287 handle.write("\n") |
282 handle.write("====================== Extract Large Contigs failed ==========================\n") | 288 handle.write("====================== Extract Large Contigs failed ==========================\n") |
285 handle.write(line) | 291 handle.write(line) |
286 e.close() | 292 e.close() |
287 handle.write("============================ (end of ec.log) =================================\n") | 293 handle.write("============================ (end of ec.log) =================================\n") |
288 handle.flush() | 294 handle.flush() |
289 | 295 |
290 #print "Collecting output..." | 296 # print("Collecting output...") |
291 start_time = time.time() | 297 start_time = time.time() |
292 collect_output(temp, name, handle) | 298 collect_output(temp, name, handle) |
293 collect_time = time.time() - start_time | 299 collect_time = time.time() - start_time |
294 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | 300 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" |
295 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | 301 % (run_time / 3600.0, collect_time / 60.0)) |
302 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" | |
303 % (run_time / 3600.0, collect_time / 60.0)) | |
296 | 304 |
297 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 305 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
298 #Treat as an error, but doing this AFTER collect_output | 306 # Treat as an error, but doing this AFTER collect_output |
299 sys.stderr.write("Extract Large Contigs failed\n") | 307 sys.stderr.write("Extract Large Contigs failed\n") |
300 handle.write("Extract Large Contigs failed\n") | 308 handle.write("Extract Large Contigs failed\n") |
301 handle.close() | 309 handle.close() |
302 sys.exit(1) | 310 sys.exit(1) |
303 | 311 |
304 #print "Cleaning up..." | 312 # print "Cleaning up..." |
305 clean_up(temp, name) | 313 clean_up(temp, name) |
306 | 314 |
307 handle.write("\nDone\n") | 315 handle.write("\nDone\n") |
308 handle.close() | 316 handle.close() |
309 print("Done") | 317 print("Done") |