comparison tools/mira4_0/mira4.py @ 32:56b421d59805 draft

planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
author peterjc
date Thu, 18 May 2017 13:36:08 -0400
parents fd95aaef8818
children 0785a6537f3e
comparison
equal deleted inserted replaced
31:fd95aaef8818 32:56b421d59805
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """A simple wrapper script to call MIRA and collect its output. 2 """A simple wrapper script to call MIRA and collect its output.
3 """ 3 """
4
5 from __future__ import print_function
6
4 import os 7 import os
8 import shutil
9 import subprocess
5 import sys 10 import sys
6 import subprocess 11 import tempfile
7 import shutil
8 import time 12 import time
9 import tempfile 13
10 from optparse import OptionParser 14 from optparse import OptionParser
11 15
12 #Do we need any PYTHONPATH magic? 16 # Do we need any PYTHONPATH magic?
13 from mira4_make_bam import make_bam 17 from mira4_make_bam import make_bam
14 18
15 WRAPPER_VER = "0.0.4" #Keep in sync with the XML file 19 WRAPPER_VER = "0.0.10" # Keep in sync with the XML file
16 20
17 21
18 def get_version(mira_binary): 22 def get_version(mira_binary):
19 """Run MIRA to find its version number""" 23 """Run MIRA to find its version number."""
20 # At the commend line I would use: mira -v | head -n 1 24 # At the commend line I would use: mira -v | head -n 1
21 # however there is some pipe error when doing that here. 25 # however there is some pipe error when doing that here.
22 cmd = [mira_binary, "-v"] 26 cmd = [mira_binary, "-v"]
23 try: 27 try:
24 child = subprocess.Popen(cmd, 28 child = subprocess.Popen(cmd,
25 stdout=subprocess.PIPE, 29 stdout=subprocess.PIPE,
26 stderr=subprocess.STDOUT) 30 stderr=subprocess.STDOUT)
27 except Exception, err: 31 except Exception as err:
28 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 32 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
29 sys.exit(1) 33 sys.exit(1)
30 ver, tmp = child.communicate() 34 ver, tmp = child.communicate()
31 del child 35 del child
32 return ver.split("\n", 1)[0].strip() 36 return ver.split("\n", 1)[0].strip()
33 37
34 #Parse Command Line 38
39 # Parse Command Line
35 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: 40 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
36 41
37 $ python mira4.py ... 42 $ python mira4.py ...
38 43
39 This will run the MIRA binary and collect its output files as directed. 44 This will run the MIRA binary and collect its output files as directed.
82 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) 87 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary))
83 mira_convert_ver = get_version(mira_convert) 88 mira_convert_ver = get_version(mira_convert)
84 if not mira_convert_ver.strip().startswith("4.0"): 89 if not mira_convert_ver.strip().startswith("4.0"):
85 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) 90 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
86 if options.version: 91 if options.version:
87 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) 92 print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER))
88 if mira_ver != mira_convert_ver: 93 if mira_ver != mira_convert_ver:
89 print "WARNING: miraconvert %s" % mira_convert_ver 94 print("WARNING: miraconvert %s" % mira_convert_ver)
90 sys.exit(0) 95 sys.exit(0)
91 96
92 if not manifest: 97 if not manifest:
93 sys.exit("Manifest is required") 98 sys.exit("Manifest is required")
94 elif not os.path.isfile(manifest): 99 elif not os.path.isfile(manifest):
119 """ 124 """
120 handle = open(manifest, "r") 125 handle = open(manifest, "r")
121 text = handle.read() 126 text = handle.read()
122 handle.close() 127 handle.close()
123 128
124 #At time of writing, this is at the end of a file, 129 # At time of writing, this is at the end of a file,
125 #but could be followed by a space in future... 130 # but could be followed by a space in future...
126 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) 131 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir())
127 132
128 #Want to try to ensure this gets written to disk before MIRA attempts 133 # Want to try to ensure this gets written to disk before MIRA attempts
129 #to open it - any networked file system may impose a delay... 134 # to open it - any networked file system may impose a delay...
130 handle = open(manifest, "w") 135 handle = open(manifest, "w")
131 handle.write(text) 136 handle.write(text)
132 handle.flush() 137 handle.flush()
133 os.fsync(handle.fileno()) 138 os.fsync(handle.fileno())
134 handle.close() 139 handle.close()
135 140
136 141
137 def log_manifest(manifest): 142 def log_manifest(manifest):
138 """Write the manifest file to stderr.""" 143 """Write the manifest file to stderr."""
139 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) 144 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60))
140 with open(manifest) as h: 145 with open(manifest) as h:
141 for line in h: 146 for line in h:
142 sys.stderr.write(line) 147 sys.stderr.write(line)
143 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) 148 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60))
144 149
145 150
146 def collect_output(temp, name, handle): 151 def collect_output(temp, name, handle):
147 """Moves files to the output filenames (global variables).""" 152 """Moves files to the output filenames (global variables)."""
148 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) 153 f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
154 sys.exit("Empty output folder") 159 sys.exit("Empty output folder")
155 missing = [] 160 missing = []
156 161
157 old_maf = "%s/%s_out.maf" % (f, name) 162 old_maf = "%s/%s_out.maf" % (f, name)
158 if not os.path.isfile(old_maf): 163 if not os.path.isfile(old_maf):
159 #Triggered extractLargeContigs.sh? 164 # Triggered extractLargeContigs.sh?
160 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) 165 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name)
161 166
162 #De novo or single strain mapping, 167 # De novo or single strain mapping,
163 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) 168 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
164 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) 169 ref_fasta = "%s/%s_out.padded.fasta" % (f, name)
165 if not os.path.isfile(old_fasta): 170 if not os.path.isfile(old_fasta):
166 #Mapping (StrainX versus reference) or de novo 171 # Mapping (StrainX versus reference) or de novo
167 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) 172 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
168 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) 173 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name)
169 if not os.path.isfile(old_fasta): 174 if not os.path.isfile(old_fasta):
170 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) 175 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name)
171 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) 176 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name)
172
173 177
174 missing = False 178 missing = False
175 for old, new in [(old_maf, out_maf), 179 for old, new in [(old_maf, out_maf),
176 (old_fasta, out_fasta)]: 180 (old_fasta, out_fasta)]:
177 if not os.path.isfile(old): 181 if not os.path.isfile(old):
185 log_manifest(manifest) 189 log_manifest(manifest)
186 sys.stderr.write("Contents of %r:\n" % f) 190 sys.stderr.write("Contents of %r:\n" % f)
187 for filename in sorted(os.listdir(f)): 191 for filename in sorted(os.listdir(f)):
188 sys.stderr.write("%s\n" % filename) 192 sys.stderr.write("%s\n" % filename)
189 193
190 #For mapping mode, probably most people would expect a BAM file 194 # For mapping mode, probably most people would expect a BAM file
191 #using the reference FASTA file... 195 # using the reference FASTA file...
192 if out_bam and out_bam != "-": 196 if out_bam and out_bam != "-":
193 if out_maf and out_maf != "-": 197 if out_maf and out_maf != "-":
194 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) 198 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
195 else: 199 else:
196 #Not collecting the MAF file, use original location 200 # Not collecting the MAF file, use original location
197 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) 201 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)
198 if msg: 202 if msg:
199 sys.exit(msg) 203 sys.exit(msg)
204
200 205
201 def clean_up(temp, name): 206 def clean_up(temp, name):
202 folder = "%s/%s_assembly" % (temp, name) 207 folder = "%s/%s_assembly" % (temp, name)
203 if os.path.isdir(folder): 208 if os.path.isdir(folder):
204 shutil.rmtree(folder) 209 shutil.rmtree(folder)
205 210
206 #TODO - Run MIRA in /tmp or a configurable directory? 211
207 #Currently Galaxy puts us somewhere safe like: 212 # TODO - Run MIRA in /tmp or a configurable directory?
208 #/opt/galaxy-dist/database/job_working_directory/846/ 213 # Currently Galaxy puts us somewhere safe like:
214 # /opt/galaxy-dist/database/job_working_directory/846/
209 temp = "." 215 temp = "."
210 216
211 name = "MIRA" 217 name = "MIRA"
212 218
213 override_temp(manifest) 219 override_temp(manifest)
216 cmd_list = [mira_binary, "-t", str(threads), manifest] 222 cmd_list = [mira_binary, "-t", str(threads), manifest]
217 cmd = " ".join(cmd_list) 223 cmd = " ".join(cmd_list)
218 224
219 assert os.path.isdir(temp) 225 assert os.path.isdir(temp)
220 d = "%s_assembly" % name 226 d = "%s_assembly" % name
221 #This can fail on my development machine if stale folders exist 227 # This can fail on my development machine if stale folders exist
222 #under Galaxy's .../database/job_working_directory/ tree: 228 # under Galaxy's .../database/job_working_directory/ tree:
223 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) 229 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d))
224 try: 230 try:
225 #Check path access 231 # Check path access
226 os.mkdir(d) 232 os.mkdir(d)
227 except Exception, err: 233 except Exception as err:
228 log_manifest(manifest) 234 log_manifest(manifest)
229 sys.stderr.write("Error making directory %s\n%s" % (d, err)) 235 sys.stderr.write("Error making directory %s\n%s" % (d, err))
230 sys.exit(1) 236 sys.exit(1)
231 237
232 #print os.path.abspath(".") 238 # print(os.path.abspath("."))
233 #print cmd 239 # print(cmd)
234 240
235 if out_log and out_log != "-": 241 if out_log and out_log != "-":
236 handle = open(out_log, "w") 242 handle = open(out_log, "w")
237 else: 243 else:
238 handle = open(os.devnull, "w") 244 handle = open(os.devnull, "w")
244 del m 250 del m
245 handle.write("\n") 251 handle.write("\n")
246 handle.write("============================ Starting MIRA now ===============================\n") 252 handle.write("============================ Starting MIRA now ===============================\n")
247 handle.flush() 253 handle.flush()
248 try: 254 try:
249 #Run MIRA 255 # Run MIRA
250 child = subprocess.Popen(cmd_list, 256 child = subprocess.Popen(cmd_list,
251 stdout=handle, 257 stdout=handle,
252 stderr=subprocess.STDOUT) 258 stderr=subprocess.STDOUT)
253 except Exception, err: 259 except Exception as err:
254 log_manifest(manifest) 260 log_manifest(manifest)
255 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) 261 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
256 #TODO - call clean up? 262 # TODO - call clean up?
257 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) 263 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
258 handle.close() 264 handle.close()
259 sys.exit(1) 265 sys.exit(1)
260 #Use .communicate as can get deadlocks with .wait(), 266 # Use .communicate as can get deadlocks with .wait(),
261 stdout, stderr = child.communicate() 267 stdout, stderr = child.communicate()
262 assert not stdout and not stderr #Should be empty as sent to handle 268 assert not stdout and not stderr # Should be empty as sent to handle
263 run_time = time.time() - start_time 269 run_time = time.time() - start_time
264 return_code = child.returncode 270 return_code = child.returncode
265 handle.write("\n") 271 handle.write("\n")
266 handle.write("============================ MIRA has finished ===============================\n") 272 handle.write("============================ MIRA has finished ===============================\n")
267 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) 273 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
268 if return_code: 274 if return_code:
269 print "MIRA took %0.2f hours" % (run_time / 3600.0) 275 print("MIRA took %0.2f hours" % (run_time / 3600.0))
270 handle.write("Return error code %i from command:\n" % return_code) 276 handle.write("Return error code %i from command:\n" % return_code)
271 handle.write(cmd + "\n") 277 handle.write(cmd + "\n")
272 handle.close() 278 handle.close()
273 clean_up(temp, name) 279 clean_up(temp, name)
274 log_manifest(manifest) 280 log_manifest(manifest)
275 sys.stderr.write("Return error code %i from command:\n" % return_code) 281 sys.stderr.write("Return error code %i from command:\n" % return_code)
276 sys.stderr.write(cmd + "\n") 282 sys.stderr.write(cmd + "\n")
277 sys.exit(eturn_code) 283 sys.exit(return_code)
278 handle.flush() 284 handle.flush()
279 285
280 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): 286 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
281 handle.write("\n") 287 handle.write("\n")
282 handle.write("====================== Extract Large Contigs failed ==========================\n") 288 handle.write("====================== Extract Large Contigs failed ==========================\n")
285 handle.write(line) 291 handle.write(line)
286 e.close() 292 e.close()
287 handle.write("============================ (end of ec.log) =================================\n") 293 handle.write("============================ (end of ec.log) =================================\n")
288 handle.flush() 294 handle.flush()
289 295
290 #print "Collecting output..." 296 # print("Collecting output...")
291 start_time = time.time() 297 start_time = time.time()
292 collect_output(temp, name, handle) 298 collect_output(temp, name, handle)
293 collect_time = time.time() - start_time 299 collect_time = time.time() - start_time
294 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) 300 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n"
295 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) 301 % (run_time / 3600.0, collect_time / 60.0))
302 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n"
303 % (run_time / 3600.0, collect_time / 60.0))
296 304
297 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): 305 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
298 #Treat as an error, but doing this AFTER collect_output 306 # Treat as an error, but doing this AFTER collect_output
299 sys.stderr.write("Extract Large Contigs failed\n") 307 sys.stderr.write("Extract Large Contigs failed\n")
300 handle.write("Extract Large Contigs failed\n") 308 handle.write("Extract Large Contigs failed\n")
301 handle.close() 309 handle.close()
302 sys.exit(1) 310 sys.exit(1)
303 311
304 #print "Cleaning up..." 312 # print "Cleaning up..."
305 clean_up(temp, name) 313 clean_up(temp, name)
306 314
307 handle.write("\nDone\n") 315 handle.write("\nDone\n")
308 handle.close() 316 handle.close()
309 print("Done") 317 print("Done")