Mercurial > repos > peterjc > mira4_assembler
annotate tools/mira4_0/mira4_convert.py @ 31:fd95aaef8818 draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
author | peterjc |
---|---|
date | Wed, 10 Feb 2016 09:07:39 -0500 |
parents | 55ae131c5862 |
children | 56b421d59805 |
rev | line source |
---|---|
25 | 1 #!/usr/bin/env python |
2 """A simple wrapper script to call MIRA and collect its output. | |
3 | |
4 This focuses on the miraconvert binary. | |
5 """ | |
6 import os | |
7 import sys | |
8 import subprocess | |
9 import shutil | |
10 from optparse import OptionParser | |
11 try: | |
12 from io import BytesIO | |
13 except ImportError: | |
14 #Should we worry about Python 2.5 or older? | |
15 from StringIO import StringIO as BytesIO | |
16 | |
17 #Do we need any PYTHONPATH magic? | |
18 from mira4_make_bam import depad | |
19 | |
20 WRAPPER_VER = "0.0.7" # Keep in sync with the XML file | |
21 | |
22 | |
23 def run(cmd): | |
24 #Avoid using shell=True when we call subprocess to ensure if the Python | |
25 #script is killed, so too is the child process. | |
26 try: | |
27 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
28 except Exception, err: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
29 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
25 | 30 #Use .communicate as can get deadlocks with .wait(), |
31 stdout, stderr = child.communicate() | |
32 return_code = child.returncode | |
33 if return_code: | |
34 cmd_str = " ".join(cmd) # doesn't quote spaces etc | |
35 if stderr and stdout: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
36 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
25 | 37 else: |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
38 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
25 | 39 |
40 def get_version(mira_binary): | |
41 """Run MIRA to find its version number""" | |
42 # At the commend line I would use: mira -v | head -n 1 | |
43 # however there is some pipe error when doing that here. | |
44 cmd = [mira_binary, "-v"] | |
45 try: | |
46 child = subprocess.Popen(cmd, | |
47 stdout=subprocess.PIPE, | |
48 stderr=subprocess.STDOUT) | |
49 except Exception, err: | |
50 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | |
51 sys.exit(1) | |
52 ver, tmp = child.communicate() | |
53 del child | |
54 return ver.split("\n", 1)[0].strip() | |
55 | |
56 #Parse Command Line | |
57 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | |
58 | |
59 $ python mira4_convert.py ... | |
60 | |
61 This will run the MIRA miraconvert binary and collect its output files as directed. | |
62 """ % WRAPPER_VER | |
63 parser = OptionParser(usage=usage) | |
64 parser.add_option("--input", dest="input", | |
65 default=None, metavar="FILE", | |
66 help="MIRA input filename") | |
67 parser.add_option("-x", "--min_length", dest="min_length", | |
68 default="0", | |
69 help="Minimum contig length") | |
70 parser.add_option("-y", "--min_cover", dest="min_cover", | |
71 default="0", | |
72 help="Minimum average contig coverage") | |
73 parser.add_option("-z", "--min_reads", dest="min_reads", | |
74 default="0", | |
75 help="Minimum reads per contig") | |
76 parser.add_option("--maf", dest="maf", | |
77 default="", metavar="FILE", | |
78 help="MIRA MAF output filename") | |
79 parser.add_option("--ace", dest="ace", | |
80 default="", metavar="FILE", | |
81 help="ACE output filename") | |
82 parser.add_option("--bam", dest="bam", | |
83 default="", metavar="FILE", | |
84 help="Unpadded BAM output filename") | |
85 parser.add_option("--fasta", dest="fasta", | |
86 default="", metavar="FILE", | |
87 help="Unpadded FASTA output filename") | |
88 parser.add_option("--cstats", dest="cstats", | |
89 default="", metavar="FILE", | |
90 help="Contig statistics filename") | |
91 parser.add_option("-v", "--version", dest="version", | |
92 default=False, action="store_true", | |
93 help="Show version and quit") | |
94 options, args = parser.parse_args() | |
95 if args: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
96 sys.exit("Expected options (e.g. --input example.maf), not arguments") |
25 | 97 |
98 input_maf = options.input | |
99 out_maf = options.maf | |
100 out_bam = options.bam | |
101 out_fasta = options.fasta | |
102 out_ace = options.ace | |
103 out_cstats = options.cstats | |
104 | |
105 try: | |
106 mira_path = os.environ["MIRA4"] | |
107 except KeyError: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
108 sys.exit("Environment variable $MIRA4 not set") |
25 | 109 mira_convert = os.path.join(mira_path, "miraconvert") |
110 if not os.path.isfile(mira_convert): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
111 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" |
25 | 112 % (mira_convert, ", ".join(os.listdir(mira_path)))) |
113 | |
114 mira_convert_ver = get_version(mira_convert) | |
115 if not mira_convert_ver.strip().startswith("4.0"): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
116 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) |
25 | 117 if options.version: |
118 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) | |
119 sys.exit(0) | |
120 | |
121 if not input_maf: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
122 sys.exit("Input MIRA file is required") |
25 | 123 elif not os.path.isfile(input_maf): |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
124 sys.exit("Missing input MIRA file: %r" % input_maf) |
25 | 125 |
126 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
127 sys.exit("No output requested") |
25 | 128 |
129 | |
130 def check_min_int(value, name): | |
131 try: | |
132 i = int(value) | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
133 except ValueError: |
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
134 sys.exit("Bad %s setting, %r" % (name, value)) |
25 | 135 if i < 0: |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
136 sys.exit("Negative %s setting, %r" % (name, value)) |
25 | 137 return i |
138 | |
139 min_length = check_min_int(options.min_length, "minimum length") | |
140 min_cover = check_min_int(options.min_cover, "minimum cover") | |
141 min_reads = check_min_int(options.min_reads, "minimum reads") | |
142 | |
143 #TODO - Run MIRA in /tmp or a configurable directory? | |
144 #Currently Galaxy puts us somewhere safe like: | |
145 #/opt/galaxy-dist/database/job_working_directory/846/ | |
146 temp = "." | |
147 | |
148 | |
149 cmd_list = [mira_convert] | |
150 if min_length: | |
151 cmd_list.extend(["-x", str(min_length)]) | |
152 if min_cover: | |
153 cmd_list.extend(["-y", str(min_cover)]) | |
154 if min_reads: | |
155 cmd_list.extend(["-z", str(min_reads)]) | |
156 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) | |
157 if out_maf: | |
158 cmd_list.append("maf") | |
159 if out_bam: | |
160 cmd_list.append("samnbb") | |
161 if not out_fasta: | |
162 #Need this for samtools depad | |
163 out_fasta = os.path.join(temp, "depadded.fasta") | |
164 if out_fasta: | |
165 cmd_list.append("fasta") | |
166 if out_ace: | |
167 cmd_list.append("ace") | |
168 if out_cstats: | |
169 cmd_list.append("cstats") | |
170 run(cmd_list) | |
171 | |
172 def collect(old, new): | |
173 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
174 sys.exit("Missing expected output file %s" % old) |
25 | 175 shutil.move(old, new) |
176 | |
177 if out_maf: | |
178 collect(os.path.join(temp, "converted.maf"), out_maf) | |
179 if out_fasta: | |
180 #Can we look at the MAF file to see if there are multiple strains? | |
181 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") | |
182 if os.path.isfile(old): | |
183 collect(old, out_fasta) | |
184 else: | |
185 #Might the output be filtered down to zero contigs? | |
186 old = os.path.join(temp, "converted.fasta") | |
187 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
188 sys.exit("Missing expected output FASTA file") |
25 | 189 elif os.path.getsize(old) == 0: |
190 print("Warning - no contigs (harsh filters?)") | |
191 collect(old, out_fasta) | |
192 else: | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
193 sys.exit("Missing expected output FASTA file (only generic file present)") |
25 | 194 if out_ace: |
195 collect(os.path.join(temp, "converted.maf"), out_ace) | |
196 if out_cstats: | |
197 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) | |
198 | |
199 if out_bam: | |
200 assert os.path.isfile(out_fasta) | |
201 old = os.path.join(temp, "converted.samnbb") | |
202 if not os.path.isfile(old): | |
203 old = os.path.join(temp, "converted.sam") | |
204 if not os.path.isfile(old): | |
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
205 sys.exit("Missing expected intermediate file %s" % old) |
25 | 206 h = BytesIO() |
207 msg = depad(out_fasta, old, out_bam, h) | |
208 if msg: | |
209 print(msg) | |
210 print(h.getvalue()) | |
211 h.close() | |
212 sys.exit(1) | |
213 h.close() | |
214 if out_fasta == os.path.join(temp, "depadded.fasta"): | |
215 #Not asked for by Galaxy, no longer needed | |
216 os.remove(out_fasta) | |
217 | |
218 if min_length or min_cover or min_reads: | |
219 print("Filtered.") | |
220 else: | |
221 print("Converted.") |