# HG changeset patch # User peterjc # Date 1414499399 14400 # Node ID 4abe8d59a438648095ffe3687c7beb6338da2679 # Parent aeb3e35f823676c6cdaf34b04e8273b22b6d83df Uploaded v0.0.4 preview 1; fix getting BAM without MAF diff -r aeb3e35f8236 -r 4abe8d59a438 test-data/header.mira --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/header.mira Tue Oct 28 08:29:59 2014 -0400 @@ -0,0 +1,2 @@ +@Version 2 0 +@Program MIRALIB diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/README.rst --- a/tools/mira4/README.rst Tue Jun 10 10:11:58 2014 -0400 +++ b/tools/mira4/README.rst Tue Oct 28 08:29:59 2014 -0400 @@ -44,9 +44,11 @@ * ``mira4_de_novo.xml`` (the Galaxy tool definition for de novo usage) * ``mira4_mapping.xml`` (the Galaxy tool definition for mapping usage) +* ``mira4_convert.xml`` (the Galaxy tool definition for converting MIRA files) * ``mira4_bait.xml`` (the Galaxy tool definition for mirabait) * ``mira4.py`` (the Python wrapper script) -* ``mira_bait.py`` (the Python wrapper script for mirabait) +* ``mira4_convert.py`` (the Python wrapper script for miraconvert) +* ``mira4_bait.py`` (the Python wrapper script for mirabait) * ``mira4_validator.py`` (the XML parameter validation script) The suggested location is a new ``tools/mira4`` folder. You will also need to @@ -75,6 +77,7 @@ $ ./run_functional_tests.sh -id mira_4_0_bait $ ./run_functional_tests.sh -id mira_4_0_de_novo $ ./run_functional_tests.sh -id mira_4_0_mapping + $ ./run_functional_tests.sh -id mira_4_0_convert History @@ -93,6 +96,7 @@ v0.0.3 - Updated to target MIRA 4.0.2 v0.0.4 - Using optparse for the Python wrapper script API - Made MAF and BAM outputs optional + - Include wrapper for ``miraconvert`` ======= ====================================================================== @@ -105,7 +109,7 @@ For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use the following command from the Galaxy root folder:: - $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/empty_file.dat + $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/header.mira test-data/empty_file.dat Check this worked:: @@ -114,9 +118,11 @@ tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml + tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py + tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml @@ -132,6 +138,7 @@ test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta + test-data/header.mira test-data/empty_file.dat diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4.py --- a/tools/mira4/mira4.py Tue Jun 10 10:11:58 2014 -0400 +++ b/tools/mira4/mira4.py Tue Oct 28 08:29:59 2014 -0400 @@ -58,6 +58,9 @@ parser.add_option("--log", dest="log", default="-", metavar="FILE", help="MIRA logging output filename") +parser.add_option("-v", "--version", dest="version", + default=False, action="store_true", + help="Show version and quit") options, args = parser.parse_args() manifest = options.manifest out_maf = options.maf @@ -84,7 +87,7 @@ mira_convert_ver = get_version(mira_convert) if not mira_convert_ver.strip().startswith("4.0"): stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) -if "-v" in sys.argv or "--version" in sys.argv: +if options.version: print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) if mira_ver != mira_convert_ver: print "WARNING: miraconvert %s" % mira_convert_ver @@ -192,7 +195,11 @@ #For mapping mode, probably most people would expect a BAM file #using the reference FASTA file... if out_bam and out_bam != "-": - msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) + if out_maf and out_maf != "-": + msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) + else: + #Not collecting the MAF file, use original location + msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) if msg: stop_err(msg) @@ -216,7 +223,9 @@ assert os.path.isdir(temp) d = "%s_assembly" % name -assert not os.path.isdir(d), "Path %s already exists" % d +#This can fail on my development machine if stale folders exist +#under Galaxy's .../database/job_working_directory/ tree: +assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) try: #Check path access os.mkdir(d) diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_convert.py Tue Oct 28 08:29:59 2014 -0400 @@ -0,0 +1,225 @@ +#!/usr/bin/env python +"""A simple wrapper script to call MIRA and collect its output. + +This focuses on the miraconvert binary. +""" +import os +import sys +import subprocess +import shutil +import time +import tempfile +from optparse import OptionParser +try: + from io import BytesIO +except ImportError: + #Should we worry about Python 2.5 or older? + from StringIO import StringIO as BytesIO + +#Do we need any PYTHONPATH magic? +from mira4_make_bam import depad + +WRAPPER_VER = "0.0.5" #Keep in sync with the XML file + +def stop_err(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + +def run(cmd): + #Avoid using shell=True when we call subprocess to ensure if the Python + #script is killed, so too is the child process. + try: + child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception, err: + stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + #Use .communicate as can get deadlocks with .wait(), + stdout, stderr = child.communicate() + return_code = child.returncode + if return_code: + if stderr and stdout: + stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr)) + else: + stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr)) + +def get_version(mira_binary): + """Run MIRA to find its version number""" + # At the commend line I would use: mira -v | head -n 1 + # however there is some pipe error when doing that here. + cmd = [mira_binary, "-v"] + try: + child = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + sys.exit(1) + ver, tmp = child.communicate() + del child + return ver.split("\n", 1)[0].strip() + +#Parse Command Line +usage = """Galaxy MIRA4 wrapper script v%s - use as follows: + +$ python mira4_convert.py ... + +This will run the MIRA miraconvert binary and collect its output files as directed. +""" % WRAPPER_VER +parser = OptionParser(usage=usage) +parser.add_option("--input", dest="input", + default=None, metavar="FILE", + help="MIRA input filename") +parser.add_option("-x", "--min_length", dest="min_length", + default="0", + help="Minimum contig length") +parser.add_option("-y", "--min_cover", dest="min_cover", + default="0", + help="Minimum average contig coverage") +parser.add_option("-z", "--min_reads", dest="min_reads", + default="0", + help="Minimum reads per contig") +parser.add_option("--maf", dest="maf", + default="", metavar="FILE", + help="MIRA MAF output filename") +parser.add_option("--ace", dest="ace", + default="", metavar="FILE", + help="ACE output filename") +parser.add_option("--bam", dest="bam", + default="", metavar="FILE", + help="Unpadded BAM output filename") +parser.add_option("--fasta", dest="fasta", + default="", metavar="FILE", + help="Unpadded FASTA output filename") +parser.add_option("--cstats", dest="cstats", + default="", metavar="FILE", + help="Contig statistics filename") +parser.add_option("-v", "--version", dest="version", + default=False, action="store_true", + help="Show version and quit") +options, args = parser.parse_args() +if args: + stop_err("Expected options (e.g. --input example.maf), not arguments") + +input_maf = options.input +out_maf = options.maf +out_bam = options.bam +out_fasta = options.fasta +out_ace = options.ace +out_cstats = options.cstats + +try: + mira_path = os.environ["MIRA4"] +except KeyError: + stop_err("Environment variable $MIRA4 not set") +mira_convert = os.path.join(mira_path, "miraconvert") +if not os.path.isfile(mira_convert): + stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path)))) + +mira_convert_ver = get_version(mira_convert) +if not mira_convert_ver.strip().startswith("4.0"): + stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) +if options.version: + print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER) + sys.exit(0) + +if not input_maf: + stop_err("Input MIRA file is required") +elif not os.path.isfile(input_maf): + stop_err("Missing input MIRA file: %r" % input_maf) + +if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): + stop_err("No output requested") + + +def check_min_int(value, name): + try: + i = int(value) + except: + stop_err("Bad %s setting, %r" % (name, value)) + if i < 0: + stop_err("Negative %s setting, %r" % (name, value)) + return i + +min_length = check_min_int(options.min_length, "minimum length") +min_cover = check_min_int(options.min_cover, "minimum cover") +min_reads = check_min_int(options.min_reads, "minimum reads") + +#TODO - Run MIRA in /tmp or a configurable directory? +#Currently Galaxy puts us somewhere safe like: +#/opt/galaxy-dist/database/job_working_directory/846/ +temp = "." + + +cmd_list = [mira_convert] +if min_length: + cmd_list.extend(["-x", str(min_length)]) +if min_cover: + cmd_list.extend(["-y", str(min_cover)]) +if min_reads: + cmd_list.extend(["-z", str(min_reads)]) +cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) +if out_maf: + cmd_list.append("maf") +if out_bam: + cmd_list.append("samnbb") + if not out_fasta: + #Need this for samtools depad + out_fasta = os.path.join(temp, "depadded.fasta") +if out_fasta: + cmd_list.append("fasta") +if out_ace: + cmd_list.append("ace") +if out_cstats: + cmd_list.append("cstats") +run(cmd_list) + +def collect(old, new): + if not os.path.isfile(old): + stop_err("Missing expected output file %s" % old) + shutil.move(old, new) + +if out_maf: + collect(os.path.join(temp, "converted.maf"), out_maf) +if out_fasta: + #Can we look at the MAF file to see if there are multiple strains? + old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") + if os.path.isfile(old): + collect(old, out_fasta) + else: + #Might the output be filtered down to zero contigs? + old = os.path.join(temp, "converted.fasta") + if not os.path.isfile(old): + stop_err("Missing expected output FASTA file") + elif os.path.getsize(old) == 0: + print("Warning - no contigs (harsh filters?)") + collect(old, out_fasta) + else: + stop_err("Missing expected output FASTA file (only generic file present)") +if out_ace: + collect(os.path.join(temp, "converted.maf"), out_ace) +if out_cstats: + collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) + +if out_bam: + assert os.path.isfile(out_fasta) + old = os.path.join(temp, "converted.samnbb") + if not os.path.isfile(old): + old = os.path.join(temp, "converted.sam") + if not os.path.isfile(old): + stop_err("Missing expected intermediate file %s" % old) + h = BytesIO() + msg = depad(out_fasta, old, out_bam, h) + if msg: + print(msg) + print(h.getvalue()) + h.close() + sys.exit(1) + h.close() + if out_fasta == os.path.join(temp, "depadded.fasta"): + #Not asked for by Galaxy, no longer needed + os.remove(out_fasta) + +if min_length or min_cover or min_reads: + print("Filtered.") +else: + print("Converted.") diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_convert.xml Tue Oct 28 08:29:59 2014 -0400 @@ -0,0 +1,114 @@ + + Convert MIRA assembly to FASTA/SAM/BAM + + miraconvert + MIRA + + mira4_convert.py --version + +mira4_convert.py +--input "$mira_file" +--min_length $min_length +--min_cover $min_cover +--min_reads $min_reads +#if str($maf_wanted)=="true": +--maf "$out_maf" +#end if +#if str($fasta_wanted)=="true": +--fasta "$out_fasta" +#end if +#if str($bam_wanted)=="true": +--bam "$out_bam" +#end if +##Don't yet have a Galaxy datatype defined for ace: +## #if str($ace_wanted)=="true": +## --ace "$out_ace" +## #end if +#if str($cstats_wanted)=="true": +--cstats "$out_cstats" +#end if + + + + + + + + + + + + + + + + + + + + + maf_wanted is True + + + fasta_wanted is True + + + bam_wanted is True + + + + cstats_wanted is True + + + + + + +**What it does** + +Runs the ``miraconvert`` utility from MIRA v4.0 to filter and/or convert +a MIRA Assembly Format file produced by a *mapping* or *de novo* assembly. + +**Example Usage** + +You want to remove all the low coverage contigs from a transcriptome +assembly to focus on those with higher coverage. + +You want to convert your MIRA assembly into SAM/BAM to run a standard +SNP finding tool. + +You've lost the FASTA consensus from your MIRA assembly and need to +regenerate it. + + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + + diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_de_novo.xml --- a/tools/mira4/mira4_de_novo.xml Tue Jun 10 10:11:58 2014 -0400 +++ b/tools/mira4/mira4_de_novo.xml Tue Oct 28 08:29:59 2014 -0400 @@ -180,7 +180,8 @@ - + + + @@ -196,11 +197,9 @@ - - + + - - diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/tool_dependencies.xml --- a/tools/mira4/tool_dependencies.xml Tue Jun 10 10:11:58 2014 -0400 +++ b/tools/mira4/tool_dependencies.xml Tue Oct 28 08:29:59 2014 -0400 @@ -1,7 +1,7 @@ - +