comparison tools/mira4/mira4.py @ 0:32f693f6e741 draft

Uploaded v0.0.1 preview0, very much a work in progress, primarily checking mira_datatypes dependency
author peterjc
date Thu, 26 Sep 2013 12:23:42 -0400
parents
children df86ed992a1b
comparison
equal deleted inserted replaced
-1:000000000000 0:32f693f6e741
1 #!/usr/bin/env python
2 """A simple wrapper script to call MIRA and collect its output.
3 """
4 import os
5 import sys
6 import subprocess
7 import shutil
8 import time
9
10 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file
11
12 def stop_err(msg, err=1):
13 sys.stderr.write(msg+"\n")
14 sys.exit(err)
15
16
17 def get_version(mira_binary):
18 """Run MIRA to find its version number"""
19 # At the commend line I would use: mira -v | head -n 1
20 # however there is some pipe error when doing that here.
21 cmd = [mira_binary, "-v"]
22 try:
23 child = subprocess.Popen(cmd,
24 stdout=subprocess.PIPE,
25 stderr=subprocess.STDOUT)
26 except Exception, err:
27 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
28 sys.exit(1)
29 ver, tmp = child.communicate()
30 del child
31 return ver.split("\n", 1)[0]
32
33
34 os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"]
35 mira_binary = "mira"
36 mira_ver = get_version(mira_binary)
37 if not mira_ver.strip().startswith("4.0"):
38 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
39 if "-v" in sys.argv:
40 print "MIRA wrapper version %s," % WRAPPER_VER
41 print mira_ver
42 sys.exit(0)
43
44
45 def log_manifest(manifest):
46 """Write the manifest file to stderr."""
47 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60))
48 with open(manifest) as h:
49 for line in h:
50 sys.stderr.write(line)
51 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60))
52
53
54 def massage_symlinks(manifest):
55 """Create FASTQ aliases and edit the manifest to use them.
56
57 Short term measure for MIRA 4.0RC2 which depends on data file
58 extensions to decide the file format, and doesn't like *.dat
59 as used in Galaxy.
60 """
61 base = os.path.split(manifest)[0]
62 with open(manifest) as h:
63 lines = h.readlines()
64 f = 0
65 for i, line in enumerate(lines):
66 if not line.startswith("data ="):
67 continue
68 #Assumes no spaces in filename, would they have to be escaped?
69 new_line = "data ="
70 for filename in line[6:].strip().split():
71 if not filename:
72 continue
73 assert os.path.isfile(filename), filename
74 f += 1
75 alias = os.path.join(base, "input%i.fastq" % f)
76 new_line += " " + alias
77 cmd = "ln -s %s %s" % (filename, alias)
78 if os.system(cmd):
79 stop_err("Problem creating FASTQ alias:\n%s" % cmd)
80 lines[i] = new_line + "\n"
81 with open(manifest, "w") as h:
82 for line in lines:
83 #sys.stderr.write(line)
84 h.write(line)
85 return True
86
87
88 def collect_output(temp, name):
89 n3 = (temp, name, name, name)
90 f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
91 if not os.path.isdir(f):
92 log_manifest(manifest)
93 stop_err("Missing output folder")
94 if not os.listdir(f):
95 log_manifest(manifest)
96 stop_err("Empty output folder")
97 missing = []
98 for old, new in [("%s/%s_out.maf" % (f, name), out_maf),
99 ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]:
100 if not os.path.isfile(old):
101 missing.append(os.path.splitext(old)[-1])
102 else:
103 shutil.move(old, new)
104 if missing:
105 log_manifest(manifest)
106 sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f)))
107 stop_err("Missing output files: %s" % ", ".join(missing))
108
109 def clean_up(temp, name):
110 folder = "%s/%s_assembly" % (temp, name)
111 if os.path.isdir(folder):
112 shutil.rmtree(folder)
113
114 #TODO - Run MIRA in /tmp or a configurable directory?
115 #Currently Galaxy puts us somewhere safe like:
116 #/opt/galaxy-dist/database/job_working_directory/846/
117 temp = "."
118 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8]
119 name = "MIRA"
120 manifest, out_maf, out_fasta, out_log = sys.argv[1:5]
121
122 #Hack until MIRA v4 lets us specify file format explicitly,
123 massage_symlinks(manifest)
124
125 start_time = time.time()
126 #cmd_list =sys.argv[8:]
127 cmd_list = [mira_binary, manifest]
128 cmd = " ".join(cmd_list)
129
130 assert os.path.isdir(temp)
131 d = "%s_assembly" % name
132 assert not os.path.isdir(d), "Path %s already exists" % d
133 try:
134 #Check path access
135 os.mkdir(d)
136 except Exception, err:
137 log_manifest(manifest)
138 sys.stderr.write("Error making directory %s\n%s" % (d, err))
139 sys.exit(1)
140
141 #print os.path.abspath(".")
142 #print cmd
143
144 handle = open(out_log, "w")
145 try:
146 #Run MIRA
147 child = subprocess.Popen(cmd_list,
148 stdout=handle,
149 stderr=subprocess.STDOUT)
150 except Exception, err:
151 log_manifest(manifest)
152 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
153 #TODO - call clean up?
154 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
155 handle.close()
156 sys.exit(1)
157 #Use .communicate as can get deadlocks with .wait(),
158 stdout, stderr = child.communicate()
159 assert not stdout and not stderr #Should be empty as sent to handle
160 run_time = time.time() - start_time
161 return_code = child.returncode
162 handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0))
163 print "MIRA took %0.2f minutes" % (run_time / 60.0)
164 if return_code:
165 handle.write("Return error code %i from command:\n" % return_code)
166 handle.write(cmd + "\n")
167 handle.close()
168 clean_up(temp, name)
169 log_manifest(manifest)
170 stop_err("Return error code %i from command:\n%s" % (return_code, cmd),
171 return_code)
172 handle.close()
173
174 #print "Collecting output..."
175 collect_output(temp, name)
176
177 #print "Cleaning up..."
178 clean_up(temp, name)
179
180 print "Done"