# HG changeset patch
# User peterjc
# Date 1414499399 14400
# Node ID 4abe8d59a438648095ffe3687c7beb6338da2679
# Parent  aeb3e35f823676c6cdaf34b04e8273b22b6d83df
Uploaded v0.0.4 preview 1; fix getting BAM without MAF
diff -r aeb3e35f8236 -r 4abe8d59a438 test-data/header.mira
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/header.mira	Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,2 @@
+@Version	2	0
+@Program	MIRALIB
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/README.rst
--- a/tools/mira4/README.rst	Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/README.rst	Tue Oct 28 08:29:59 2014 -0400
@@ -44,9 +44,11 @@
 
 * ``mira4_de_novo.xml`` (the Galaxy tool definition for de novo usage)
 * ``mira4_mapping.xml`` (the Galaxy tool definition for mapping usage)
+* ``mira4_convert.xml`` (the Galaxy tool definition for converting MIRA files)
 * ``mira4_bait.xml`` (the Galaxy tool definition for mirabait)
 * ``mira4.py`` (the Python wrapper script)
-* ``mira_bait.py`` (the Python wrapper script for mirabait)
+* ``mira4_convert.py`` (the Python wrapper script for miraconvert)
+* ``mira4_bait.py`` (the Python wrapper script for mirabait)
 * ``mira4_validator.py`` (the XML parameter validation script)
 
 The suggested location is a new ``tools/mira4`` folder. You will also need to
@@ -75,6 +77,7 @@
     $ ./run_functional_tests.sh -id mira_4_0_bait
     $ ./run_functional_tests.sh -id mira_4_0_de_novo
     $ ./run_functional_tests.sh -id mira_4_0_mapping
+    $ ./run_functional_tests.sh -id mira_4_0_convert
 
 
 History
@@ -93,6 +96,7 @@
 v0.0.3  - Updated to target MIRA 4.0.2
 v0.0.4  - Using optparse for the Python wrapper script API
         - Made MAF and BAM outputs optional
+        - Include wrapper for ``miraconvert``
 ======= ======================================================================
 
 
@@ -105,7 +109,7 @@
 For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder::
 
-    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/empty_file.dat
+    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4_convert.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_convert.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml tools/mira4/repository_dependencies.xml test-data/U13small_m.fastq test-data/U13small_m.mira4_de_novo.fasta test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/ecoli.fastq test-data/ecoli.mira4_de_novo.fasta test-data/header.mira test-data/empty_file.dat
 
 Check this worked::
 
@@ -114,9 +118,11 @@
     tools/mira4/mira4_de_novo.xml
     tools/mira4/mira4_mapping.xml
     tools/mira4/mira4_bait.xml
+    tools/mira4/mira4_convert.xml
     tools/mira4/mira4.py
     tools/mira4/mira4_make_bam.py
     tools/mira4/mira4_validator.py
+    tools/mira4/mira4_convert.py
     tools/mira4/mira4_bait.py
     tools/mira4/tool_dependencies.xml
     tools/mira4/repository_dependencies.xml
@@ -132,6 +138,7 @@
     test-data/tvc_mini_bait_neg.fastq
     test-data/ecoli.fastq
     test-data/ecoli.mira4_de_novo.fasta
+    test-data/header.mira
     test-data/empty_file.dat
 
 
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4.py
--- a/tools/mira4/mira4.py	Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/mira4.py	Tue Oct 28 08:29:59 2014 -0400
@@ -58,6 +58,9 @@
 parser.add_option("--log", dest="log",
                   default="-", metavar="FILE",
                   help="MIRA logging output filename")
+parser.add_option("-v", "--version", dest="version",
+                  default=False, action="store_true",
+                  help="Show version and quit")
 options, args = parser.parse_args()
 manifest = options.manifest
 out_maf = options.maf
@@ -84,7 +87,7 @@
 mira_convert_ver = get_version(mira_convert)
 if not mira_convert_ver.strip().startswith("4.0"):
     stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
-if "-v" in sys.argv or "--version" in sys.argv:
+if options.version:
     print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)
     if mira_ver != mira_convert_ver:
         print "WARNING: miraconvert %s" % mira_convert_ver
@@ -192,7 +195,11 @@
     #For mapping mode, probably most people would expect a BAM file
     #using the reference FASTA file...
     if out_bam and out_bam != "-":
-        msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
+        if out_maf and out_maf != "-":
+            msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
+        else:
+            #Not collecting the MAF file, use original location        
+            msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle)
         if msg:
             stop_err(msg)
 
@@ -216,7 +223,9 @@
 
 assert os.path.isdir(temp)
 d = "%s_assembly" % name
-assert not os.path.isdir(d), "Path %s already exists" % d
+#This can fail on my development machine if stale folders exist
+#under Galaxy's .../database/job_working_directory/ tree:
+assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d))
 try:
     #Check path access
     os.mkdir(d)
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.py	Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA and collect its output.
+
+This focuses on the miraconvert binary.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+import tempfile
+from optparse import OptionParser
+try:
+    from io import BytesIO
+except ImportError:
+    #Should we worry about Python 2.5 or older?
+    from StringIO import StringIO as BytesIO
+
+#Do we need any PYTHONPATH magic?
+from mira4_make_bam import depad
+
+WRAPPER_VER = "0.0.5" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+def run(cmd):
+    #Avoid using shell=True when we call subprocess to ensure if the Python
+    #script is killed, so too is the child process.
+    try:
+        child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    except Exception, err:
+        stop_err("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+    #Use .communicate as can get deadlocks with .wait(),
+    stdout, stderr = child.communicate()
+    return_code = child.returncode
+    if return_code:
+        if stderr and stdout:
+            stop_err("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, err, stdout, stderr))
+        else:
+            stop_err("Return code %i from command:\n%s\n%s" % (return_code, err, stderr))
+
+def get_version(mira_binary):
+    """Run MIRA to find its version number"""
+    # At the commend line I would use: mira -v | head -n 1
+    # however there is some pipe error when doing that here.
+    cmd = [mira_binary, "-v"]
+    try:
+        child = subprocess.Popen(cmd,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+        sys.exit(1)
+    ver, tmp = child.communicate()
+    del child
+    return ver.split("\n", 1)[0].strip()
+
+#Parse Command Line
+usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
+
+$ python mira4_convert.py ...
+
+This will run the MIRA miraconvert binary and collect its output files as directed.
+""" % WRAPPER_VER
+parser = OptionParser(usage=usage)
+parser.add_option("--input", dest="input",
+                  default=None, metavar="FILE",
+                  help="MIRA input filename")
+parser.add_option("-x", "--min_length", dest="min_length",
+                  default="0",
+                  help="Minimum contig length")
+parser.add_option("-y", "--min_cover", dest="min_cover",
+                  default="0",
+                  help="Minimum average contig coverage")
+parser.add_option("-z", "--min_reads", dest="min_reads",
+                  default="0",
+                  help="Minimum reads per contig")
+parser.add_option("--maf", dest="maf",
+                  default="", metavar="FILE",
+                  help="MIRA MAF output filename")
+parser.add_option("--ace", dest="ace",
+                  default="", metavar="FILE",
+                  help="ACE output filename")
+parser.add_option("--bam", dest="bam",
+                  default="", metavar="FILE",
+                  help="Unpadded BAM output filename")
+parser.add_option("--fasta", dest="fasta",
+                  default="", metavar="FILE",
+                  help="Unpadded FASTA output filename")
+parser.add_option("--cstats", dest="cstats",
+                  default="", metavar="FILE",
+                  help="Contig statistics filename")
+parser.add_option("-v", "--version", dest="version",
+                  default=False, action="store_true",
+                  help="Show version and quit")
+options, args = parser.parse_args()
+if args:
+    stop_err("Expected options (e.g. --input example.maf), not arguments")
+
+input_maf = options.input
+out_maf = options.maf
+out_bam = options.bam
+out_fasta = options.fasta
+out_ace = options.ace
+out_cstats = options.cstats
+
+try:
+    mira_path = os.environ["MIRA4"]
+except KeyError:
+    stop_err("Environment variable $MIRA4 not set")
+mira_convert = os.path.join(mira_path, "miraconvert")
+if not os.path.isfile(mira_convert):
+    stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
+             % (mira_convert, ", ".join(os.listdir(mira_path))))
+
+mira_convert_ver = get_version(mira_convert)
+if not mira_convert_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
+if options.version:
+    print "%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)
+    sys.exit(0)
+
+if not input_maf:
+    stop_err("Input MIRA file is required")
+elif not os.path.isfile(input_maf):
+    stop_err("Missing input MIRA file: %r" % input_maf)
+
+if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
+    stop_err("No output requested")
+
+
+def check_min_int(value, name):
+    try:
+        i = int(value)
+    except:
+        stop_err("Bad %s setting, %r" % (name, value))
+    if i < 0:
+        stop_err("Negative %s setting, %r" % (name, value))
+    return i
+
+min_length = check_min_int(options.min_length, "minimum length")
+min_cover = check_min_int(options.min_cover, "minimum cover")
+min_reads = check_min_int(options.min_reads, "minimum reads")
+
+#TODO - Run MIRA in /tmp or a configurable directory?
+#Currently Galaxy puts us somewhere safe like:
+#/opt/galaxy-dist/database/job_working_directory/846/
+temp = "."
+
+
+cmd_list = [mira_convert]
+if min_length:
+    cmd_list.extend(["-x", str(min_length)])
+if min_cover:
+    cmd_list.extend(["-y", str(min_cover)])
+if min_reads:
+    cmd_list.extend(["-z", str(min_reads)])
+cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
+if out_maf:
+    cmd_list.append("maf")
+if out_bam:
+    cmd_list.append("samnbb")
+    if not out_fasta:
+        #Need this for samtools depad
+        out_fasta = os.path.join(temp, "depadded.fasta")
+if out_fasta:
+    cmd_list.append("fasta")
+if out_ace:
+    cmd_list.append("ace")
+if out_cstats:
+    cmd_list.append("cstats")
+run(cmd_list)
+
+def collect(old, new):
+    if not os.path.isfile(old):
+        stop_err("Missing expected output file %s" % old)
+    shutil.move(old, new)
+
+if out_maf:
+    collect(os.path.join(temp, "converted.maf"), out_maf)
+if out_fasta:
+    #Can we look at the MAF file to see if there are multiple strains?
+    old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
+    if os.path.isfile(old):
+        collect(old, out_fasta)
+    else:
+        #Might the output be filtered down to zero contigs?
+        old = os.path.join(temp, "converted.fasta")
+        if not os.path.isfile(old):
+            stop_err("Missing expected output FASTA file")
+        elif os.path.getsize(old) == 0:
+            print("Warning - no contigs (harsh filters?)")
+            collect(old, out_fasta)
+        else:
+            stop_err("Missing expected output FASTA file (only generic file present)")
+if out_ace:
+    collect(os.path.join(temp, "converted.maf"), out_ace)
+if out_cstats:
+    collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
+
+if out_bam:
+    assert os.path.isfile(out_fasta)
+    old = os.path.join(temp, "converted.samnbb")
+    if not os.path.isfile(old):
+        old = os.path.join(temp, "converted.sam")
+    if not os.path.isfile(old):
+        stop_err("Missing expected intermediate file %s" % old)
+    h = BytesIO()
+    msg = depad(out_fasta, old, out_bam, h)
+    if msg:
+        print(msg)
+        print(h.getvalue())
+        h.close()
+        sys.exit(1)
+    h.close()
+    if out_fasta == os.path.join(temp, "depadded.fasta"):
+        #Not asked for by Galaxy, no longer needed
+        os.remove(out_fasta)
+
+if min_length or min_cover or min_reads:
+    print("Filtered.")
+else:
+    print("Converted.")
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_convert.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_convert.xml	Tue Oct 28 08:29:59 2014 -0400
@@ -0,0 +1,114 @@
+
+    Convert MIRA assembly to FASTA/SAM/BAM
+    
+        miraconvert
+        MIRA
+    
+    mira4_convert.py --version
+    
+mira4_convert.py
+--input "$mira_file"
+--min_length $min_length
+--min_cover $min_cover
+--min_reads $min_reads
+#if str($maf_wanted)=="true":
+--maf "$out_maf"
+#end if
+#if str($fasta_wanted)=="true":
+--fasta "$out_fasta"
+#end if
+#if str($bam_wanted)=="true":
+--bam "$out_bam"
+#end if
+##Don't yet have a Galaxy datatype defined for ace:
+## #if str($ace_wanted)=="true":
+## --ace "$out_ace"
+## #end if
+#if str($cstats_wanted)=="true":
+--cstats "$out_cstats"
+#end if
+    
+    
+        
+        
+        
+    
+    
+        
+        
+        
+        
+        
+        
+        
+        
+        
+        
+    
+    
+        
+              maf_wanted is True
+        
+        
+              fasta_wanted is True
+        
+        
+              bam_wanted is True
+        
+        
+        
+              cstats_wanted is True
+        
+    
+    
+        
+    
+    
+**What it does**
+
+Runs the ``miraconvert`` utility from MIRA v4.0 to filter and/or convert
+a MIRA Assembly Format file produced by a *mapping* or *de novo* assembly.
+
+**Example Usage**
+
+You want to remove all the low coverage contigs from a transcriptome
+assembly to focus on those with higher coverage.
+
+You want to convert your MIRA assembly into SAM/BAM to run a standard
+SNP finding tool.
+
+You've lost the FASTA consensus from your MIRA assembly and need to
+regenerate it.
+
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+    
+
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/mira4_de_novo.xml
--- a/tools/mira4/mira4_de_novo.xml	Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/mira4_de_novo.xml	Tue Oct 28 08:29:59 2014 -0400
@@ -180,7 +180,8 @@
             
             
             
-            
+            
+            
             
         
         
+            
             
         
         
@@ -196,11 +197,9 @@
             
             
             
-            
-            
+            
+            
             
-            
-            
             
         
     
diff -r aeb3e35f8236 -r 4abe8d59a438 tools/mira4/tool_dependencies.xml
--- a/tools/mira4/tool_dependencies.xml	Tue Jun 10 10:11:58 2014 -0400
+++ b/tools/mira4/tool_dependencies.xml	Tue Oct 28 08:29:59 2014 -0400
@@ -1,7 +1,7 @@
 
 
     
-        
+