Mercurial > repos > peterjc > mira4_assembler
annotate tools/mira4_0/mira4_convert.py @ 38:cee8f9005e43 draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 206259620376b322fc8ed99a6efdd3712f38764b
author | peterjc |
---|---|
date | Wed, 11 Jul 2018 12:35:35 -0400 |
parents | 259891fce7fd |
children | bbf14bb9607b |
rev | line source |
---|---|
25 | 1 #!/usr/bin/env python |
2 """A simple wrapper script to call MIRA and collect its output. | |
3 | |
4 This focuses on the miraconvert binary. | |
5 """ | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
6 |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
7 from __future__ import print_function |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
8 |
25 | 9 import os |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
10 import shutil |
25 | 11 import subprocess |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
12 import sys |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
13 |
25 | 14 from optparse import OptionParser |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
15 |
25 | 16 try: |
17 from io import BytesIO | |
18 except ImportError: | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
19 # Should we worry about Python 2.5 or older? |
25 | 20 from StringIO import StringIO as BytesIO |
21 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
22 # Do we need any PYTHONPATH magic? |
25 | 23 from mira4_make_bam import depad |
24 | |
35
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
25 WRAPPER_VER = "0.0.11" # Keep in sync with the XML file |
25 | 26 |
27 | |
28 def run(cmd): | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
29 # Avoid using shell=True when we call subprocess to ensure if the Python |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
30 # script is killed, so too is the child process. |
25 | 31 try: |
34
0785a6537f3e
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 6405ba93fcec7ea93452bf54d559c7507ee7a57c
peterjc
parents:
32
diff
changeset
|
32 child = subprocess.Popen(cmd, universal_newlines=True, |
0785a6537f3e
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 6405ba93fcec7ea93452bf54d559c7507ee7a57c
peterjc
parents:
32
diff
changeset
|
33 stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
34 except Exception as err: |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
35 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
36 # Use .communicate as can get deadlocks with .wait(), |
25 | 37 stdout, stderr = child.communicate() |
38 return_code = child.returncode | |
39 if return_code: | |
40 cmd_str = " ".join(cmd) # doesn't quote spaces etc | |
41 if stderr and stdout: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
42 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
25 | 43 else: |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
44 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
25 | 45 |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
46 |
25 | 47 def get_version(mira_binary): |
38
cee8f9005e43
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 206259620376b322fc8ed99a6efdd3712f38764b
peterjc
parents:
35
diff
changeset
|
48 """Run MIRA to find its version number.""" |
25 | 49 # At the commend line I would use: mira -v | head -n 1 |
50 # however there is some pipe error when doing that here. | |
51 cmd = [mira_binary, "-v"] | |
52 try: | |
53 child = subprocess.Popen(cmd, | |
54 stdout=subprocess.PIPE, | |
55 stderr=subprocess.STDOUT) | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
56 except Exception as err: |
25 | 57 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
58 sys.exit(1) | |
59 ver, tmp = child.communicate() | |
60 del child | |
61 return ver.split("\n", 1)[0].strip() | |
62 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
63 |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
64 # Parse Command Line |
25 | 65 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: |
66 | |
67 $ python mira4_convert.py ... | |
68 | |
69 This will run the MIRA miraconvert binary and collect its output files as directed. | |
70 """ % WRAPPER_VER | |
71 parser = OptionParser(usage=usage) | |
72 parser.add_option("--input", dest="input", | |
73 default=None, metavar="FILE", | |
74 help="MIRA input filename") | |
75 parser.add_option("-x", "--min_length", dest="min_length", | |
76 default="0", | |
77 help="Minimum contig length") | |
78 parser.add_option("-y", "--min_cover", dest="min_cover", | |
79 default="0", | |
80 help="Minimum average contig coverage") | |
81 parser.add_option("-z", "--min_reads", dest="min_reads", | |
82 default="0", | |
83 help="Minimum reads per contig") | |
84 parser.add_option("--maf", dest="maf", | |
85 default="", metavar="FILE", | |
86 help="MIRA MAF output filename") | |
87 parser.add_option("--ace", dest="ace", | |
88 default="", metavar="FILE", | |
89 help="ACE output filename") | |
90 parser.add_option("--bam", dest="bam", | |
91 default="", metavar="FILE", | |
92 help="Unpadded BAM output filename") | |
93 parser.add_option("--fasta", dest="fasta", | |
94 default="", metavar="FILE", | |
95 help="Unpadded FASTA output filename") | |
96 parser.add_option("--cstats", dest="cstats", | |
97 default="", metavar="FILE", | |
98 help="Contig statistics filename") | |
99 parser.add_option("-v", "--version", dest="version", | |
100 default=False, action="store_true", | |
101 help="Show version and quit") | |
102 options, args = parser.parse_args() | |
103 if args: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
104 sys.exit("Expected options (e.g. --input example.maf), not arguments") |
25 | 105 |
106 input_maf = options.input | |
107 out_maf = options.maf | |
108 out_bam = options.bam | |
109 out_fasta = options.fasta | |
110 out_ace = options.ace | |
111 out_cstats = options.cstats | |
112 | |
35
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
113 if "MIRA4" in os.environ: |
25 | 114 mira_path = os.environ["MIRA4"] |
35
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
115 mira_convert = os.path.join(mira_path, "miraconvert") |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
116 if not os.path.isfile(mira_convert): |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
117 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
118 % (mira_convert, ", ".join(os.listdir(mira_path)))) |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
119 else: |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
120 sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
121 mira_path = None |
259891fce7fd
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit e4c56df75150c82d3e9c4ac487c4209108e52412
peterjc
parents:
34
diff
changeset
|
122 mira_convert = "miraconvert" |
25 | 123 |
124 mira_convert_ver = get_version(mira_convert) | |
125 if not mira_convert_ver.strip().startswith("4.0"): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
126 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) |
25 | 127 if options.version: |
128 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) | |
129 sys.exit(0) | |
130 | |
131 if not input_maf: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
132 sys.exit("Input MIRA file is required") |
25 | 133 elif not os.path.isfile(input_maf): |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
134 sys.exit("Missing input MIRA file: %r" % input_maf) |
25 | 135 |
136 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
137 sys.exit("No output requested") |
25 | 138 |
139 | |
140 def check_min_int(value, name): | |
141 try: | |
142 i = int(value) | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
143 except ValueError: |
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
144 sys.exit("Bad %s setting, %r" % (name, value)) |
25 | 145 if i < 0: |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
146 sys.exit("Negative %s setting, %r" % (name, value)) |
25 | 147 return i |
148 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
149 |
25 | 150 min_length = check_min_int(options.min_length, "minimum length") |
151 min_cover = check_min_int(options.min_cover, "minimum cover") | |
152 min_reads = check_min_int(options.min_reads, "minimum reads") | |
153 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
154 # TODO - Run MIRA in /tmp or a configurable directory? |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
155 # Currently Galaxy puts us somewhere safe like: |
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
156 # /opt/galaxy-dist/database/job_working_directory/846/ |
25 | 157 temp = "." |
158 | |
159 | |
160 cmd_list = [mira_convert] | |
161 if min_length: | |
162 cmd_list.extend(["-x", str(min_length)]) | |
163 if min_cover: | |
164 cmd_list.extend(["-y", str(min_cover)]) | |
165 if min_reads: | |
166 cmd_list.extend(["-z", str(min_reads)]) | |
167 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) | |
168 if out_maf: | |
169 cmd_list.append("maf") | |
170 if out_bam: | |
171 cmd_list.append("samnbb") | |
172 if not out_fasta: | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
173 # Need this for samtools depad |
25 | 174 out_fasta = os.path.join(temp, "depadded.fasta") |
175 if out_fasta: | |
176 cmd_list.append("fasta") | |
177 if out_ace: | |
178 cmd_list.append("ace") | |
179 if out_cstats: | |
180 cmd_list.append("cstats") | |
181 run(cmd_list) | |
182 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
183 |
25 | 184 def collect(old, new): |
185 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
186 sys.exit("Missing expected output file %s" % old) |
25 | 187 shutil.move(old, new) |
188 | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
189 |
25 | 190 if out_maf: |
191 collect(os.path.join(temp, "converted.maf"), out_maf) | |
192 if out_fasta: | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
193 # Can we look at the MAF file to see if there are multiple strains? |
25 | 194 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") |
195 if os.path.isfile(old): | |
196 collect(old, out_fasta) | |
197 else: | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
198 # Might the output be filtered down to zero contigs? |
25 | 199 old = os.path.join(temp, "converted.fasta") |
200 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
201 sys.exit("Missing expected output FASTA file") |
25 | 202 elif os.path.getsize(old) == 0: |
203 print("Warning - no contigs (harsh filters?)") | |
204 collect(old, out_fasta) | |
205 else: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
206 sys.exit("Missing expected output FASTA file (only generic file present)") |
25 | 207 if out_ace: |
208 collect(os.path.join(temp, "converted.maf"), out_ace) | |
209 if out_cstats: | |
210 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) | |
211 | |
212 if out_bam: | |
213 assert os.path.isfile(out_fasta) | |
214 old = os.path.join(temp, "converted.samnbb") | |
215 if not os.path.isfile(old): | |
216 old = os.path.join(temp, "converted.sam") | |
217 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
218 sys.exit("Missing expected intermediate file %s" % old) |
25 | 219 h = BytesIO() |
220 msg = depad(out_fasta, old, out_bam, h) | |
221 if msg: | |
222 print(msg) | |
223 print(h.getvalue()) | |
224 h.close() | |
225 sys.exit(1) | |
226 h.close() | |
227 if out_fasta == os.path.join(temp, "depadded.fasta"): | |
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
228 # Not asked for by Galaxy, no longer needed |
25 | 229 os.remove(out_fasta) |
230 | |
231 if min_length or min_cover or min_reads: | |
232 print("Filtered.") | |
233 else: | |
234 print("Converted.") |