comparison msconvert_win/msconvert_wrapper.py @ 6:6b6bba73eadb draft

planemo upload commit d56659dd48f8c554a832787e71aca6ae65c90848
author galaxyp
date Tue, 14 Mar 2017 16:52:39 -0400
parents
children
comparison
equal deleted inserted replaced
5:637e309295cf 6:6b6bba73eadb
1 #!/usr/bin/env python
2 import optparse
3 import os
4 import sys
5 import tempfile
6 import shutil
7 import subprocess
8 import re
9 import logging
10 import shlex
11
12 assert sys.version_info[:2] >= (2, 6)
13
14 log = logging.getLogger(__name__)
15 working_directory = os.getcwd()
16 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name
17 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name
18
19
20 def stop_err(msg):
21 sys.stderr.write("%s\n" % msg)
22 sys.exit()
23
24
25 def read_stderr():
26 stderr = ''
27 if(os.path.exists(tmp_stderr_name)):
28 with open(tmp_stderr_name, 'rb') as tmp_stderr:
29 buffsize = 1048576
30 try:
31 while True:
32 stderr += tmp_stderr.read(buffsize)
33 if not stderr or len(stderr) % buffsize != 0:
34 break
35 except OverflowError:
36 pass
37 return stderr
38
39
40 def execute(command, stdin=None):
41 try:
42 with open(tmp_stderr_name, 'wb') as tmp_stderr:
43 with open(tmp_stdout_name, 'wb') as tmp_stdout:
44 args = shlex.split(command) # handle proper splitting of quoted args
45 proc = subprocess.Popen(args=args, shell=False, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ)
46 returncode = proc.wait()
47 if returncode != 0:
48 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()))
49 finally:
50 print(( open(tmp_stderr_name, "r").read() ))
51 print(( open(tmp_stdout_name, "r").read() ))
52
53
54 def delete_file(path):
55 if os.path.exists(path):
56 try:
57 os.remove(path)
58 except:
59 pass
60
61
62 def delete_directory(directory):
63 if os.path.exists(directory):
64 try:
65 shutil.rmtree(directory)
66 except:
67 pass
68
69
70 def symlink(source, link_name):
71 import platform
72 if platform.system() == 'Windows':
73 try:
74 import win32file
75 win32file.CreateSymbolicLink(source, link_name, 1)
76 except:
77 shutil.copy(source, link_name)
78 else:
79 os.symlink(source, link_name)
80
81
82 def copy_to_working_directory(data_file, relative_path):
83 if os.path.abspath(data_file) != os.path.abspath(relative_path):
84 symlink(data_file, relative_path)
85 return relative_path
86
87
88 def __main__():
89 run_script()
90
91 #ENDTEMPLATE
92
93 to_extensions = ['mzML', 'mzXML', 'unindexed_mzML', 'unindexed_mzXML', 'mgf', 'mz5', 'txt', 'ms2', 'cms2']
94
95
96 def str_to_bool(v):
97 """ From http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python """
98 return v.lower() in ["yes", "true", "t", "1"]
99
100
101 def _add_filter(filters_file, contents):
102 filters_file.write("filter=%s\n" % contents)
103
104
105 def _skip_line(options, file_num, line_parts):
106 file_num_column = options.filter_table_file_column
107 if not file_num_column:
108 return False
109 else:
110 target_file_num_val = str(file_num).strip()
111 query_file_num_val = line_parts[int(file_num_column) - 1].strip()
112 #print "target %s, query %s" % (target_file_num_val, query_file_num_val)
113 return target_file_num_val != query_file_num_val
114
115
116 def _read_table_numbers(path, options, file_num=None):
117 unique_numbers = set([])
118 column_num = options.filter_table_column
119 input = open(path, "r")
120 first_line = True
121 for line in input:
122 if not line:
123 continue
124 line = line.strip()
125 if line.startswith("#"):
126 first_line = False
127 continue
128 if column_num == None:
129 column = line
130 else:
131 line_parts = line.split("\t")
132 if _skip_line(options, file_num, line_parts):
133 continue
134 column = line_parts[int(column_num) - 1]
135 match = re.match("\d+", column)
136 if match:
137 unique_numbers.add(int(match.group()))
138 first_line = False
139 return unique_numbers
140
141
142 def shellquote(s):
143 return '"' + s.replace('"', '\\"') + '"'
144
145
146 def _add_filter_line_from_file(filter_file, options, file_num=None):
147 file = options.filter_table
148 if not file:
149 return
150 numbers = _read_table_numbers(file, options, file_num)
151 msconvert_int_set = " ".join([str(number) for number in numbers])
152 filter_type = options.filter_table_type
153 if filter_type == 'number':
154 filter_prefix = 'scanNumber'
155 else:
156 filter_prefix = 'index'
157 _add_filter(filter_file, "%s %s" % (filter_prefix, msconvert_int_set))
158
159
160 def _create_filters_file(options, file_num=None, debug=False):
161 suffix = "" if not file_num else str(file_num)
162 filters_file_path = "filters%s" % suffix
163 filters_file = open(filters_file_path, "w")
164 if options.filters_file:
165 filters_file.write(open(options.filters_file, "r").read())
166 for filter in options.filter:
167 _add_filter(filters_file, filter)
168 _add_filter_line_from_file(filters_file, options, file_num=file_num)
169
170 filters_file.close()
171 if debug:
172 print(( open(filters_file_path, "r").read() ))
173 return filters_file_path
174
175
176 def _build_base_cmd(options,args=None):
177 to_extension = options.toextension
178 if to_extension.startswith("unindexed_"):
179 to_extension = to_extension[len("unindexed_"):]
180 to_params = "--noindex"
181 else:
182 to_params = ""
183 cmd = "msconvert --%s %s" % (to_extension, to_params)
184 if args:
185 cmd = "%s %s" % (cmd, ' '.join(args))
186 if str_to_bool(options.zlib):
187 cmd = "%s %s" % (cmd, "--zlib")
188 if options.binaryencoding:
189 cmd = "%s --%s" % (cmd, options.binaryencoding)
190 if options.mzencoding:
191 cmd = "%s --mz%s" % (cmd, options.mzencoding)
192 if options.intensityencoding:
193 cmd = "%s --inten%s" % (cmd, options.intensityencoding)
194 return cmd
195
196
197 def _run(base_cmd, output_dir='output', inputs=[], debug=False):
198 inputs_as_str = " ".join(['%s' % shellquote(input) for input in inputs])
199 os.mkdir(output_dir)
200 cmd = "%s -o %s %s" % (base_cmd, shellquote(output_dir), inputs_as_str)
201 if debug:
202 print(cmd)
203 execute(cmd)
204 output_files = os.listdir(output_dir)
205 assert len(output_files) == 1
206 output_file = output_files[0]
207 return os.path.join(output_dir, output_file)
208
209
210 def run_script():
211 parser = optparse.OptionParser()
212 parser.add_option('--input', dest='inputs', action='append', default=[])
213 parser.add_option('--input_name', dest='input_names', action='append', default=[])
214 parser.add_option('--implicit', dest='implicits', action='append', default=[], help='input files that should NOT be on the msconvert command line.')
215 parser.add_option('--ident', dest='idents', action='append', default=[])
216 parser.add_option('--ident_name', dest='ident_names', action='append', default=[])
217 parser.add_option('--output', dest='output')
218 parser.add_option('--refinement', dest='refinement')
219 parser.add_option('--fromextension', dest='fromextension')
220 parser.add_option('--toextension', dest='toextension', default='mzML', choices=to_extensions)
221 parser.add_option('--binaryencoding', dest='binaryencoding', choices=['32', '64'])
222 parser.add_option('--mzencoding', dest='mzencoding', choices=['32', '64'])
223 parser.add_option('--intensityencoding', dest='intensityencoding', choices=['32', '64'])
224 parser.add_option('--zlib', dest='zlib', default="false")
225 parser.add_option('--filter', dest='filter', action='append', default=[])
226 parser.add_option('--filters_file', dest='filters_file', default=None)
227 parser.add_option('--filter_table', default=None)
228 parser.add_option('--filter_table_type', default='index', choices=['index', 'number'])
229 parser.add_option('--filter_table_column', default=None)
230 parser.add_option('--filter_table_file_column', default=None)
231 parser.add_option('--debug', dest='debug', action='store_true', default=False)
232
233 (options, args) = parser.parse_args()
234 if len(options.inputs) < 1:
235 stop_err("No input files to msconvert specified")
236 if len(options.input_names) > 0 and len(options.input_names) != len(options.inputs):
237 stop_err("Number(s) of supplied input names and input files do not match")
238 if not options.output:
239 stop_err("Must specify output location")
240 input_files = []
241 for i, input in enumerate(options.inputs):
242 input_base = None
243 if len(options.input_names) > i:
244 input_base = options.input_names[i]
245 input_base = input_base.replace("'", "").replace("\"", "")
246 print("1- input_base: %s" % input_base)
247 if not input_base:
248 input_base = 'input%s' % i
249 print("2- input_base: %s" % input_base)
250 if not input_base.lower().endswith('.%s' % options.fromextension.lower()) and input not in options.implicits:
251 input_file = '%s.%s' % (input_base, options.fromextension)
252 print("3- input_base: %s" % input_base)
253 print("3- input_file: %s" % input_file)
254 else:
255 input_file = input_base
256 print("4- input_base: %s" % input_base)
257 print("4- input_file: %s" % input_file)
258 input_file = input_file
259 copy_to_working_directory(input, input_file)
260 if input in options.implicits:
261 continue
262 input_files.append(input_file)
263 for i, ident in enumerate(options.idents):
264 ident_file = options.ident_names[i]
265 copy_to_working_directory(ident, ident_file)
266
267 cmd = _build_base_cmd(options,args=args)
268 file_column = options.filter_table_file_column
269 if not file_column:
270 # Apply same filters to all files, just create a unviersal filter files
271 # and run msconvert once.
272 filters_file_path = _create_filters_file(options, debug=options.debug)
273 cmd = "%s -c %s" % (cmd, filters_file_path)
274 else:
275 # Dispatching on a column to filter different files differently, need to filter
276 # each input once with msconvert and then merge once.
277 filtered_files = []
278 for index, input_file in enumerate(input_files):
279 filters_file_path = _create_filters_file(options, index + 1, debug=options.debug)
280 filter_cmd = "%s -c %s" % (cmd, filters_file_path)
281 filtered_output_file = _run(filter_cmd, output_dir='output%d' % index, inputs=[input_file], debug=options.debug)
282 filtered_files.append(filtered_output_file)
283 input_files = filtered_files
284 if len(input_files) > 1:
285 cmd = "%s --merge" % cmd
286 output_file = _run(cmd, output_dir='output', inputs=input_files, debug=options.debug)
287 shutil.copy(output_file, options.output)
288 if options.refinement:
289 # .mzRefinement.tsv
290 files = os.listdir(os.getcwd())
291 for fname in files:
292 if fname.endswith('.mzRefinement.tsv'):
293 shutil.copy(fname, options.refinement)
294 break
295
296 def __main__():
297 run_script()
298
299 if __name__ == '__main__':
300 __main__()