# HG changeset patch
# User peterjc
# Date 1414499399 14400
# Node ID 4abe8d59a438648095ffe3687c7beb6338da2679
# Parent aeb3e35f823676c6cdaf34b04e8273b22b6d83df
Uploaded v0.0.4 preview 1; fix getting BAM without MAF
diff -r aeb3e35f8236 -r 4abe8d59a438 test-data/header.mira
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/header.mira Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,2 @@
+@Version 2 0
+@Program MIRALIB
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/README.rst
--- a/tools/mira4/README.rst Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/README.rst Tue Oct 28 08:29:59 2014 -0400
@@ -44,9 +44,11 @@
* ``mira4_de_novo.xml`` (the Galaxy tool definition for de novo usage)
* ``mira4_mapping.xml`` (the Galaxy tool definition for mapping usage)
+* ``mira4_convert.xml`` (the Galaxy tool definition for converting MIRA files)
* ``mira4_bait.xml`` (the Galaxy tool definition for mirabait)
* ``mira4.py`` (the Python wrapper script)
-* ``mira_bait.py`` (the Python wrapper script for mirabait)
+* ``mira4_convert.py`` (the Python wrapper script for miraconvert)
+* ``mira4_bait.py`` (the Python wrapper script for mirabait)
* ``mira4_validator.py`` (the XML parameter validation script)
The suggested location is a new ``tools/mira4`` folder. You will also need to
@@ -75,6 +77,7 @@
$ ./run_functional_tests.sh -id mira_4_0_bait
$ ./run_functional_tests.sh -id mira_4_0_de_novo
$ ./run_functional_tests.sh -id mira_4_0_mapping
+ $ ./run_functional_tests.sh -id mira_4_0_convert
History
@@ -93,6 +96,7 @@
v0.0.3 - Updated to target MIRA 4.0.2
v0.0.4 - Using optparse for the Python wrapper script API
- Made MAF and BAM outputs optional
+ - Include wrapper for ``miraconvert``
======= ======================================================================
@@ -105,7 +109,7 @@
For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
the following command from the Galaxy root folder::
- $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/empty_file.dat
+ $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/header.mira test-data/empty_file.dat
Check this worked::
@@ -114,9 +118,11 @@
tools/mira4/mira4_de_novo.xml
tools/mira4/mira4_mapping.xml
tools/mira4/mira4_bait.xml
+ tools/mira4/mira4_convert.xml
tools/mira4/mira4.py
tools/mira4/mira4_make_bam.py
tools/mira4/mira4_validator.py
+ tools/mira4/mira4_convert.py
tools/mira4/mira4_bait.py
tools/mira4/tool_dependencies.xml
tools/mira4/repository_dependencies.xml
@@ -132,6 +138,7 @@
test-data/tvc_mini_bait_neg.fastq
test-data/ecoli.fastq
test-data/ecoli.mira4_de_novo.fasta
+ test-data/header.mira
test-data/empty_file.dat
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4.py
--- a/tools/mira4/mira4.py Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/mira4.py Tue Oct 28 08:29:59 2014 -0400
@@ -58,6 +58,9 @@
parser.add_option("--log", dest="log",
default="-", metavar="FILE",
help="MIRA logging output filename")
+parser.add_option("-v", "--version", dest="version",
+ default=False, action="store_true",
+ help="Show version and quit")
options, args = parser.parse_args()
manifest = options.manifest
out_maf = options.maf
@@ -84,7 +87,7 @@
mira_convert_ver = get_version(mira_convert)
if not mira_convert_ver.strip().startswith("4.0"):
stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
-if "-v" in sys.argv or "--version" in sys.argv:
+if options.version:
print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)
if mira_ver != mira_convert_ver:
print "WARNING: miraconvert %s" % mira_convert_ver
@@ -192,7 +195,11 @@
#For mapping mode, probably most people would expect a BAM file
#using the reference FASTA file...
if out_bam and out_bam != "-":
- msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
+ if out_maf and out_maf != "-":
+ msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
+ else:
+ #Not collecting the MAF file, use original location
+ msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)
if msg:
stop_err(msg)
@@ -216,7 +223,9 @@
assert os.path.isdir(temp)
d = "%s_assembly" % name
-assert not os.path.isdir(d), "Path %s already exists" % d
+#This can fail on my development machine if stale folders exist
+#under Galaxy's .../database/job_working_directory/ tree:
+assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d))
try:
#Check path access
os.mkdir(d)
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.py Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA and collect its output.
+
+This focuses on the miraconvert binary.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+import tempfile
+from optparse import OptionParser
+try:
+ from io import BytesIO
+except ImportError:
+ #Should we worry about Python 2.5 or older?
+ from StringIO import StringIO as BytesIO
+
+#Do we need any PYTHONPATH magic?
+from mira4_make_bam import depad
+
+WRAPPER_VER = "0.0.5" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+ sys.stderr.write(msg+"\n")
+ sys.exit(err)
+
+def run(cmd):
+ #Avoid using shell=True when we call subprocess to ensure if the Python
+ #script is killed, so too is the child process.
+ try:
+ child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ except Exception, err:
+ stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+ #Use .communicate as can get deadlocks with .wait(),
+ stdout, stderr = child.communicate()
+ return_code = child.returncode
+ if return_code:
+ if stderr and stdout:
+ stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr))
+ else:
+ stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr))
+
+def get_version(mira_binary):
+ """Run MIRA to find its version number"""
+ # At the commend line I would use: mira -v | head -n 1
+ # however there is some pipe error when doing that here.
+ cmd = [mira_binary, "-v"]
+ try:
+ child = subprocess.Popen(cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT)
+ except Exception, err:
+ sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+ sys.exit(1)
+ ver, tmp = child.communicate()
+ del child
+ return ver.split("\n", 1)[0].strip()
+
+#Parse Command Line
+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
+
+$ python mira4_convert.py ...
+
+This will run the MIRA miraconvert binary and collect its output files as directed.
+""" % WRAPPER_VER
+parser = OptionParser(usage=usage)
+parser.add_option("--input", dest="input",
+ default=None, metavar="FILE",
+ help="MIRA input filename")
+parser.add_option("-x", "--min_length", dest="min_length",
+ default="0",
+ help="Minimum contig length")
+parser.add_option("-y", "--min_cover", dest="min_cover",
+ default="0",
+ help="Minimum average contig coverage")
+parser.add_option("-z", "--min_reads", dest="min_reads",
+ default="0",
+ help="Minimum reads per contig")
+parser.add_option("--maf", dest="maf",
+ default="", metavar="FILE",
+ help="MIRA MAF output filename")
+parser.add_option("--ace", dest="ace",
+ default="", metavar="FILE",
+ help="ACE output filename")
+parser.add_option("--bam", dest="bam",
+ default="", metavar="FILE",
+ help="Unpadded BAM output filename")
+parser.add_option("--fasta", dest="fasta",
+ default="", metavar="FILE",
+ help="Unpadded FASTA output filename")
+parser.add_option("--cstats", dest="cstats",
+ default="", metavar="FILE",
+ help="Contig statistics filename")
+parser.add_option("-v", "--version", dest="version",
+ default=False, action="store_true",
+ help="Show version and quit")
+options, args = parser.parse_args()
+if args:
+ stop_err("Expected options (e.g. --input example.maf), not arguments")
+
+input_maf = options.input
+out_maf = options.maf
+out_bam = options.bam
+out_fasta = options.fasta
+out_ace = options.ace
+out_cstats = options.cstats
+
+try:
+ mira_path = os.environ["MIRA4"]
+except KeyError:
+ stop_err("Environment variable $MIRA4 not set")
+mira_convert = os.path.join(mira_path, "miraconvert")
+if not os.path.isfile(mira_convert):
+ stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
+ % (mira_convert, ", ".join(os.listdir(mira_path))))
+
+mira_convert_ver = get_version(mira_convert)
+if not mira_convert_ver.strip().startswith("4.0"):
+ stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
+if options.version:
+ print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)
+ sys.exit(0)
+
+if not input_maf:
+ stop_err("Input MIRA file is required")
+elif not os.path.isfile(input_maf):
+ stop_err("Missing input MIRA file: %r" % input_maf)
+
+if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
+ stop_err("No output requested")
+
+
+def check_min_int(value, name):
+ try:
+ i = int(value)
+ except:
+ stop_err("Bad %s setting, %r" % (name, value))
+ if i < 0:
+ stop_err("Negative %s setting, %r" % (name, value))
+ return i
+
+min_length = check_min_int(options.min_length, "minimum length")
+min_cover = check_min_int(options.min_cover, "minimum cover")
+min_reads = check_min_int(options.min_reads, "minimum reads")
+
+#TODO - Run MIRA in /tmp or a configurable directory?
+#Currently Galaxy puts us somewhere safe like:
+#/opt/galaxy-dist/database/job_working_directory/846/
+temp = "."
+
+
+cmd_list = [mira_convert]
+if min_length:
+ cmd_list.extend(["-x", str(min_length)])
+if min_cover:
+ cmd_list.extend(["-y", str(min_cover)])
+if min_reads:
+ cmd_list.extend(["-z", str(min_reads)])
+cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
+if out_maf:
+ cmd_list.append("maf")
+if out_bam:
+ cmd_list.append("samnbb")
+ if not out_fasta:
+ #Need this for samtools depad
+ out_fasta = os.path.join(temp, "depadded.fasta")
+if out_fasta:
+ cmd_list.append("fasta")
+if out_ace:
+ cmd_list.append("ace")
+if out_cstats:
+ cmd_list.append("cstats")
+run(cmd_list)
+
+def collect(old, new):
+ if not os.path.isfile(old):
+ stop_err("Missing expected output file %s" % old)
+ shutil.move(old, new)
+
+if out_maf:
+ collect(os.path.join(temp, "converted.maf"), out_maf)
+if out_fasta:
+ #Can we look at the MAF file to see if there are multiple strains?
+ old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
+ if os.path.isfile(old):
+ collect(old, out_fasta)
+ else:
+ #Might the output be filtered down to zero contigs?
+ old = os.path.join(temp, "converted.fasta")
+ if not os.path.isfile(old):
+ stop_err("Missing expected output FASTA file")
+ elif os.path.getsize(old) == 0:
+ print("Warning - no contigs (harsh filters?)")
+ collect(old, out_fasta)
+ else:
+ stop_err("Missing expected output FASTA file (only generic file present)")
+if out_ace:
+ collect(os.path.join(temp, "converted.maf"), out_ace)
+if out_cstats:
+ collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
+
+if out_bam:
+ assert os.path.isfile(out_fasta)
+ old = os.path.join(temp, "converted.samnbb")
+ if not os.path.isfile(old):
+ old = os.path.join(temp, "converted.sam")
+ if not os.path.isfile(old):
+ stop_err("Missing expected intermediate file %s" % old)
+ h = BytesIO()
+ msg = depad(out_fasta, old, out_bam, h)
+ if msg:
+ print(msg)
+ print(h.getvalue())
+ h.close()
+ sys.exit(1)
+ h.close()
+ if out_fasta == os.path.join(temp, "depadded.fasta"):
+ #Not asked for by Galaxy, no longer needed
+ os.remove(out_fasta)
+
+if min_length or min_cover or min_reads:
+ print("Filtered.")
+else:
+ print("Converted.")
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.xml Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,114 @@
+
+ Convert MIRA assembly to FASTA/SAM/BAM
+
+ miraconvert
+ MIRA
+
+ mira4_convert.py --version
+
+mira4_convert.py
+--input "$mira_file"
+--min_length $min_length
+--min_cover $min_cover
+--min_reads $min_reads
+#if str($maf_wanted)=="true":
+--maf "$out_maf"
+#end if
+#if str($fasta_wanted)=="true":
+--fasta "$out_fasta"
+#end if
+#if str($bam_wanted)=="true":
+--bam "$out_bam"
+#end if
+##Don't yet have a Galaxy datatype defined for ace:
+## #if str($ace_wanted)=="true":
+## --ace "$out_ace"
+## #end if
+#if str($cstats_wanted)=="true":
+--cstats "$out_cstats"
+#end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ maf_wanted is True
+
+
+ fasta_wanted is True
+
+
+ bam_wanted is True
+
+
+
+ cstats_wanted is True
+
+
+
+
+
+
+**What it does**
+
+Runs the ``miraconvert`` utility from MIRA v4.0 to filter and/or convert
+a MIRA Assembly Format file produced by a *mapping* or *de novo* assembly.
+
+**Example Usage**
+
+You want to remove all the low coverage contigs from a transcriptome
+assembly to focus on those with higher coverage.
+
+You want to convert your MIRA assembly into SAM/BAM to run a standard
+SNP finding tool.
+
+You've lost the FASTA consensus from your MIRA assembly and need to
+regenerate it.
+
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+
+
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_de_novo.xml
--- a/tools/mira4/mira4_de_novo.xml Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/mira4_de_novo.xml Tue Oct 28 08:29:59 2014 -0400
@@ -180,7 +180,8 @@
-
+
+
+
@@ -196,11 +197,9 @@
-
-
+
+
-
-
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/tool_dependencies.xml
--- a/tools/mira4/tool_dependencies.xml Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/tool_dependencies.xml Tue Oct 28 08:29:59 2014 -0400
@@ -1,7 +1,7 @@
-
+