Mercurial > repos > jmchilton > johnstestrepo
diff galaxyp-galaxyp-toolshed-msconvert-9663ae998499/msconvert_wrapper.py @ 0:942cec8d04c7 draft
Initial commit.
author | jmchilton |
---|---|
date | Wed, 19 Sep 2012 00:07:16 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxyp-galaxyp-toolshed-msconvert-9663ae998499/msconvert_wrapper.py Wed Sep 19 00:07:16 2012 -0400 @@ -0,0 +1,182 @@ +#!/usr/bin/env python +import optparse +import os +import sys +import tempfile +import shutil +import subprocess +import re +from os.path import basename +import logging + +assert sys.version_info[:2] >= ( 2, 6 ) + +log = logging.getLogger(__name__) +working_directory = os.getcwd() +tmp_stderr_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stderr').name +tmp_stdout_name = tempfile.NamedTemporaryFile(dir = working_directory, suffix = '.stdout').name + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def read_stderr(): + stderr = '' + if(os.path.exists(tmp_stderr_name)): + with open(tmp_stderr_name, 'rb') as tmp_stderr: + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read(buffsize) + if not stderr or len(stderr) % buffsize != 0: + break + except OverflowError: + pass + return stderr + +def execute(command, stdin=None): + try: + with open(tmp_stderr_name, 'wb') as tmp_stderr: + with open(tmp_stdout_name, 'wb') as tmp_stdout: + proc = subprocess.Popen(args=command, shell=True, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno(), stdin=stdin, env=os.environ) + returncode = proc.wait() + if returncode != 0: + raise Exception, "Program returned with non-zero exit code %d. stderr: %s" % (returncode, read_stderr()) + finally: + print open(tmp_stderr_name, "r").read() + print open(tmp_stdout_name, "r").read() + + +def delete_file(path): + if os.path.exists(path): + try: + os.remove(path) + except: + pass + +def delete_directory(directory): + if os.path.exists(directory): + try: + shutil.rmtree(directory) + except: + pass + +def symlink(source, link_name): + import platform + if platform.system() == 'Windows': + import win32file + win32file.CreateSymbolicLink(source, link_name, 1) + else: + os.symlink(source, link_name) + + +def copy_to_working_directory(data_file, relative_path): + if os.path.abspath(data_file) != os.path.abspath(relative_path): + shutil.copy(data_file, relative_path) + return relative_path + +def __main__(): + run_script() + +#ENDTEMPLATE + +to_extensions = ['mzML', 'mzXML', 'unindexed_mzML', 'unindexed_mzXML', 'mgf', 'txt', 'ms2', 'cms2'] + + +def str_to_bool(v): + """ From http://stackoverflow.com/questions/715417/converting-from-a-string-to-boolean-in-python """ + return v.lower() in ["yes", "true", "t", "1"] + + +def _add_filter(filters_file, contents): + filters_file.write("filter=\"%s\"\n" % contents) + + +def _read_table_numbers(path): + unique_numbers = set([]) + input = open(path, "r") + first_line = True + for line in input: + if not line: + continue + line = line.strip() + if line.startswith("#"): + first_line = False + continue + match = re.match("\d+", line) + if match: + unique_numbers.add(int(match.group())) + first_line = False + return unique_numbers + + +def _add_filter_line_from_file(file, filter_file, filter_prefix): + if not file: + return + numbers = _read_table_numbers(file) + msconvert_int_set = " ".join([str(number) for number in numbers]) + _add_filter(filter_file, "%s %s" % (filter_prefix, msconvert_int_set)) + + +def _create_filters_file(options): + filters_file_path = "filters" + filters_file = open(filters_file_path, "w") + if options.filters_file: + filters_file.write(open(options.filters_file, "r").read()) + for filter in options.filter: + _add_filter(filters_file, filter) + _add_filter_line_from_file(options.filter_indices_table, filters_file, "index") + _add_filter_line_from_file(options.filter_numbers_table, filters_file, "scanNumber") + + filters_file.close() + print open(filters_file_path, "r").read() + return filters_file_path + + +def run_script(): + parser = optparse.OptionParser() + parser.add_option('--input', dest='input') + parser.add_option('--output', dest='output') + parser.add_option('--fromextension', dest='fromextension') + parser.add_option('--toextension', dest='toextension', default='mzML', choices=to_extensions) + parser.add_option('--binaryencoding', dest='binaryencoding', choices=['32', '64']) + parser.add_option('--mzencoding', dest='mzencoding', choices=['32', '64']) + parser.add_option('--intensityencoding', dest='intensityencoding', choices=['32', '64']) + parser.add_option('--zlib', dest='zlib', default="false") + parser.add_option('--filter', dest='filter', action='append', default=[]) + parser.add_option('--filters_file', dest='filters_file', default=None) + parser.add_option('--filter_indices_table', default=None) + parser.add_option('--filter_numbers_table', default=None) + + (options, args) = parser.parse_args() + + input_file = 'input.%s' % options.fromextension + copy_to_working_directory(options.input, input_file) + os.mkdir('output') + to_extension = options.toextension + if to_extension.startswith("unindexed_"): + to_extension = to_extension[len("unindexed_"):] + to_params = "--noindex" + else: + to_params = "" + cmd = "msconvert --%s %s -o output" % (to_extension, to_params) + if str_to_bool(options.zlib): + cmd = "%s %s" % (cmd, "--zlib") + if options.binaryencoding: + cmd = "%s --%s" % (cmd, options.binaryencoding) + if options.mzencoding: + cmd = "%s --mz%s" % (cmd, options.mzencoding) + if options.intensityencoding: + cmd = "%s --inten%s" % (cmd, options.intensityencoding) + cmd = "%s %s" % (cmd, input_file) + filters_file_path = _create_filters_file(options) + cmd = "%s -c %s" % (cmd, filters_file_path) + print cmd + execute(cmd) + output_files = os.listdir('output') + assert len(output_files) == 1 + output_file = output_files[0] + shutil.copy(os.path.join('output', output_file), options.output) + + +if __name__ == '__main__': __main__()