Mercurial > repos > galaxyp > msconvert
comparison msconvert_win/msconvert_wrapper.py @ 6:6b6bba73eadb draft
planemo upload commit d56659dd48f8c554a832787e71aca6ae65c90848
author | galaxyp |
---|---|
date | Tue, 14 Mar 2017 16:52:39 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:637e309295cf | 6:6b6bba73eadb |
---|---|
1 #!/usr/bin/env python | |
2 import optparse | |
3 import os | |
4 import sys | |
5 import tempfile | |
6 import shutil | |
7 import subprocess | |
8 import re | |
9 import logging | |
10 import shlex | |
11 | |
12 assert sys.version_info[:2] >= (2, 6) | |
13 | |
14 log = logging.getLogger(__name__) | |
15 working_directory = os.getcwd() | |
16 tmp_stderr_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stderr').name | |
17 tmp_stdout_name = tempfile.NamedTemporaryFile(dir=working_directory, suffix='.stdout').name | |
18 | |
19 | |
20 def stop_err(msg): | |
21 sys.stderr.write("%s\n" % msg) | |
22 sys.exit() | |
23 | |
24 | |
25 def read_stderr(): | |
26 stderr = '' | |
27 if(os.path.exists(tmp_stderr_name)): | |
28 with open(tmp_stderr_name, 'rb') as tmp_stderr: | |
29 buffsize = 1048576 | |
30 try: | |
31 while True: | |
32 stderr += tmp_stderr.read(buffsize) | |
33 if not stderr or len(stderr) % buffsize != 0: | |
34 break | |
35 except OverflowError: | |
36 pass | |
37 return stderr | |
38 | |
39 | |
40 def execute(command, stdin=None): | |
41 try: | |
42 with open(tmp_stderr_name, 'wb') as tmp_stderr: | |
43 with open(tmp_stdout_name, 'wb') as tmp_stdout: | |
44 args = shlex.split(command) # handle proper splitting of quoted args | |
45 proc = subprocess.Popen(args=args, shell=False, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) | |
46 returncode = proc.wait() | |
47 if returncode != 0: | |
48 raise Exception("Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr())) | |
49 finally: | |
50 print(( open(tmp_stderr_name, "r").read() )) | |
51 print(( open(tmp_stdout_name, "r").read() )) | |
52 | |
53 | |
54 def delete_file(path): | |
55 if os.path.exists(path): | |
56 try: | |
57 os.remove(path) | |
58 except: | |
59 pass | |
60 | |
61 | |
62 def delete_directory(directory): | |
63 if os.path.exists(directory): | |
64 try: | |
65 shutil.rmtree(directory) | |
66 except: | |
67 pass | |
68 | |
69 | |
70 def symlink(source, link_name): | |
71 import platform | |
72 if platform.system() == 'Windows': | |
73 try: | |
74 import win32file | |
75 win32file.CreateSymbolicLink(source, link_name, 1) | |
76 except: | |
77 shutil.copy(source, link_name) | |
78 else: | |
79 os.symlink(source, link_name) | |
80 | |
81 | |
82 def copy_to_working_directory(data_file, relative_path): | |
83 if os.path.abspath(data_file) != os.path.abspath(relative_path): | |
84 symlink(data_file, relative_path) | |
85 return relative_path | |
86 | |
87 | |
88 def __main__(): | |
89 run_script() | |
90 | |
91 #ENDTEMPLATE | |
92 | |
93 to_extensions = ['mzML', 'mzXML', 'unindexed_mzML', 'unindexed_mzXML', 'mgf', 'mz5', 'txt', 'ms2', 'cms2'] | |
94 | |
95 | |
96 def str_to_bool(v): | |
97 """ From http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python """ | |
98 return v.lower() in ["yes", "true", "t", "1"] | |
99 | |
100 | |
101 def _add_filter(filters_file, contents): | |
102 filters_file.write("filter=%s\n" % contents) | |
103 | |
104 | |
105 def _skip_line(options, file_num, line_parts): | |
106 file_num_column = options.filter_table_file_column | |
107 if not file_num_column: | |
108 return False | |
109 else: | |
110 target_file_num_val = str(file_num).strip() | |
111 query_file_num_val = line_parts[int(file_num_column) - 1].strip() | |
112 #print "target %s, query %s" % (target_file_num_val, query_file_num_val) | |
113 return target_file_num_val != query_file_num_val | |
114 | |
115 | |
116 def _read_table_numbers(path, options, file_num=None): | |
117 unique_numbers = set([]) | |
118 column_num = options.filter_table_column | |
119 input = open(path, "r") | |
120 first_line = True | |
121 for line in input: | |
122 if not line: | |
123 continue | |
124 line = line.strip() | |
125 if line.startswith("#"): | |
126 first_line = False | |
127 continue | |
128 if column_num == None: | |
129 column = line | |
130 else: | |
131 line_parts = line.split("\t") | |
132 if _skip_line(options, file_num, line_parts): | |
133 continue | |
134 column = line_parts[int(column_num) - 1] | |
135 match = re.match("\d+", column) | |
136 if match: | |
137 unique_numbers.add(int(match.group())) | |
138 first_line = False | |
139 return unique_numbers | |
140 | |
141 | |
142 def shellquote(s): | |
143 return '"' + s.replace('"', '\\"') + '"' | |
144 | |
145 | |
146 def _add_filter_line_from_file(filter_file, options, file_num=None): | |
147 file = options.filter_table | |
148 if not file: | |
149 return | |
150 numbers = _read_table_numbers(file, options, file_num) | |
151 msconvert_int_set = " ".join([str(number) for number in numbers]) | |
152 filter_type = options.filter_table_type | |
153 if filter_type == 'number': | |
154 filter_prefix = 'scanNumber' | |
155 else: | |
156 filter_prefix = 'index' | |
157 _add_filter(filter_file, "%s %s" % (filter_prefix, msconvert_int_set)) | |
158 | |
159 | |
160 def _create_filters_file(options, file_num=None, debug=False): | |
161 suffix = "" if not file_num else str(file_num) | |
162 filters_file_path = "filters%s" % suffix | |
163 filters_file = open(filters_file_path, "w") | |
164 if options.filters_file: | |
165 filters_file.write(open(options.filters_file, "r").read()) | |
166 for filter in options.filter: | |
167 _add_filter(filters_file, filter) | |
168 _add_filter_line_from_file(filters_file, options, file_num=file_num) | |
169 | |
170 filters_file.close() | |
171 if debug: | |
172 print(( open(filters_file_path, "r").read() )) | |
173 return filters_file_path | |
174 | |
175 | |
176 def _build_base_cmd(options,args=None): | |
177 to_extension = options.toextension | |
178 if to_extension.startswith("unindexed_"): | |
179 to_extension = to_extension[len("unindexed_"):] | |
180 to_params = "--noindex" | |
181 else: | |
182 to_params = "" | |
183 cmd = "msconvert --%s %s" % (to_extension, to_params) | |
184 if args: | |
185 cmd = "%s %s" % (cmd, ' '.join(args)) | |
186 if str_to_bool(options.zlib): | |
187 cmd = "%s %s" % (cmd, "--zlib") | |
188 if options.binaryencoding: | |
189 cmd = "%s --%s" % (cmd, options.binaryencoding) | |
190 if options.mzencoding: | |
191 cmd = "%s --mz%s" % (cmd, options.mzencoding) | |
192 if options.intensityencoding: | |
193 cmd = "%s --inten%s" % (cmd, options.intensityencoding) | |
194 return cmd | |
195 | |
196 | |
197 def _run(base_cmd, output_dir='output', inputs=[], debug=False): | |
198 inputs_as_str = " ".join(['%s' % shellquote(input) for input in inputs]) | |
199 os.mkdir(output_dir) | |
200 cmd = "%s -o %s %s" % (base_cmd, shellquote(output_dir), inputs_as_str) | |
201 if debug: | |
202 print(cmd) | |
203 execute(cmd) | |
204 output_files = os.listdir(output_dir) | |
205 assert len(output_files) == 1 | |
206 output_file = output_files[0] | |
207 return os.path.join(output_dir, output_file) | |
208 | |
209 | |
210 def run_script(): | |
211 parser = optparse.OptionParser() | |
212 parser.add_option('--input', dest='inputs', action='append', default=[]) | |
213 parser.add_option('--input_name', dest='input_names', action='append', default=[]) | |
214 parser.add_option('--implicit', dest='implicits', action='append', default=[], help='input files that should NOT be on the msconvert command line.') | |
215 parser.add_option('--ident', dest='idents', action='append', default=[]) | |
216 parser.add_option('--ident_name', dest='ident_names', action='append', default=[]) | |
217 parser.add_option('--output', dest='output') | |
218 parser.add_option('--refinement', dest='refinement') | |
219 parser.add_option('--fromextension', dest='fromextension') | |
220 parser.add_option('--toextension', dest='toextension', default='mzML', choices=to_extensions) | |
221 parser.add_option('--binaryencoding', dest='binaryencoding', choices=['32', '64']) | |
222 parser.add_option('--mzencoding', dest='mzencoding', choices=['32', '64']) | |
223 parser.add_option('--intensityencoding', dest='intensityencoding', choices=['32', '64']) | |
224 parser.add_option('--zlib', dest='zlib', default="false") | |
225 parser.add_option('--filter', dest='filter', action='append', default=[]) | |
226 parser.add_option('--filters_file', dest='filters_file', default=None) | |
227 parser.add_option('--filter_table', default=None) | |
228 parser.add_option('--filter_table_type', default='index', choices=['index', 'number']) | |
229 parser.add_option('--filter_table_column', default=None) | |
230 parser.add_option('--filter_table_file_column', default=None) | |
231 parser.add_option('--debug', dest='debug', action='store_true', default=False) | |
232 | |
233 (options, args) = parser.parse_args() | |
234 if len(options.inputs) < 1: | |
235 stop_err("No input files to msconvert specified") | |
236 if len(options.input_names) > 0 and len(options.input_names) != len(options.inputs): | |
237 stop_err("Number(s) of supplied input names and input files do not match") | |
238 if not options.output: | |
239 stop_err("Must specify output location") | |
240 input_files = [] | |
241 for i, input in enumerate(options.inputs): | |
242 input_base = None | |
243 if len(options.input_names) > i: | |
244 input_base = options.input_names[i] | |
245 input_base = input_base.replace("'", "").replace("\"", "") | |
246 print("1- input_base: %s" % input_base) | |
247 if not input_base: | |
248 input_base = 'input%s' % i | |
249 print("2- input_base: %s" % input_base) | |
250 if not input_base.lower().endswith('.%s' % options.fromextension.lower()) and input not in options.implicits: | |
251 input_file = '%s.%s' % (input_base, options.fromextension) | |
252 print("3- input_base: %s" % input_base) | |
253 print("3- input_file: %s" % input_file) | |
254 else: | |
255 input_file = input_base | |
256 print("4- input_base: %s" % input_base) | |
257 print("4- input_file: %s" % input_file) | |
258 input_file = input_file | |
259 copy_to_working_directory(input, input_file) | |
260 if input in options.implicits: | |
261 continue | |
262 input_files.append(input_file) | |
263 for i, ident in enumerate(options.idents): | |
264 ident_file = options.ident_names[i] | |
265 copy_to_working_directory(ident, ident_file) | |
266 | |
267 cmd = _build_base_cmd(options,args=args) | |
268 file_column = options.filter_table_file_column | |
269 if not file_column: | |
270 # Apply same filters to all files, just create a unviersal filter files | |
271 # and run msconvert once. | |
272 filters_file_path = _create_filters_file(options, debug=options.debug) | |
273 cmd = "%s -c %s" % (cmd, filters_file_path) | |
274 else: | |
275 # Dispatching on a column to filter different files differently, need to filter | |
276 # each input once with msconvert and then merge once. | |
277 filtered_files = [] | |
278 for index, input_file in enumerate(input_files): | |
279 filters_file_path = _create_filters_file(options, index + 1, debug=options.debug) | |
280 filter_cmd = "%s -c %s" % (cmd, filters_file_path) | |
281 filtered_output_file = _run(filter_cmd, output_dir='output%d' % index, inputs=[input_file], debug=options.debug) | |
282 filtered_files.append(filtered_output_file) | |
283 input_files = filtered_files | |
284 if len(input_files) > 1: | |
285 cmd = "%s --merge" % cmd | |
286 output_file = _run(cmd, output_dir='output', inputs=input_files, debug=options.debug) | |
287 shutil.copy(output_file, options.output) | |
288 if options.refinement: | |
289 # .mzRefinement.tsv | |
290 files = os.listdir(os.getcwd()) | |
291 for fname in files: | |
292 if fname.endswith('.mzRefinement.tsv'): | |
293 shutil.copy(fname, options.refinement) | |
294 break | |
295 | |
296 def __main__(): | |
297 run_script() | |
298 | |
299 if __name__ == '__main__': | |
300 __main__() |