changeset 13:7fcabeeca5df draft

Uploaded v0.0.2 preview 5, fixes for MIRA 4.0 (final), more verbose error if $MIRA4 path wrong
author peterjc
date Sun, 16 Feb 2014 16:30:48 -0500
parents a6a56440567c
children 133b863a8a40
files test-data/empty_file.dat tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_bait.py tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml
diffstat 5 files changed, 39 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/tools/mira4/README.rst	Mon Feb 03 06:25:35 2014 -0500
+++ b/tools/mira4/README.rst	Sun Feb 16 16:30:48 2014 -0500
@@ -1,7 +1,7 @@
 Galaxy wrapper for the MIRA assembly program (v4.0)
 ===================================================
 
-This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute
+This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below (MIT licence).
 
@@ -98,7 +98,7 @@
 For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
 the following command from the Galaxy root folder::
 
-    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq
+    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/empty_file.dat
 
 Check this worked::
 
@@ -120,6 +120,7 @@
     test-data/tvc_mini_bait_pos.fastq
     test-data/tvc_mini_bait_strict.fastq
     test-data/tvc_mini_bait_neg.fastq
+    test-data/empty_file.dat
 
 
 
--- a/tools/mira4/mira4.py	Mon Feb 03 06:25:35 2014 -0500
+++ b/tools/mira4/mira4.py	Sun Feb 16 16:30:48 2014 -0500
@@ -40,10 +40,12 @@
     stop_err("Environment variable $MIRA4 not set")
 mira_binary = os.path.join(mira_path, "mira")
 if not os.path.isfile(mira_binary):
-    stop_err("Missing mira under $MIRA4, %r" % mira_binary)
+    stop_err("Missing mira under $MIRA4, %r\nFolder contained: %s"
+             % (mira_binary, ", ".join(os.listdir(mira_path))))
 mira_convert = os.path.join(mira_path, "miraconvert")
 if not os.path.isfile(mira_convert):
-    stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert)
+    stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
+             % (mira_convert, ", ".join(os.listdir(mira_path))))
 
 mira_ver = get_version(mira_binary)
 if not mira_ver.strip().startswith("4.0"):
--- a/tools/mira4/mira4_bait.py	Mon Feb 03 06:25:35 2014 -0500
+++ b/tools/mira4/mira4_bait.py	Sun Feb 16 16:30:48 2014 -0500
@@ -43,7 +43,8 @@
     stop_err("Environment variable $MIRA4 not set")
 mira_binary = os.path.join(mira_path, "mirabait")
 if not os.path.isfile(mira_binary):
-    stop_err("Missing mirabait under $MIRA4, %r" % mira_binary)
+    stop_err("Missing mirabait under $MIRA4, %r\nFolder contained: %s"
+             % (mira_binary, ", ".join(os.listdir(mira_path))))
 mira_ver = get_version(mira_binary)
 if not mira_ver.strip().startswith("4.0"):
     stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
--- a/tools/mira4/mira4_de_novo.xml	Mon Feb 03 06:25:35 2014 -0500
+++ b/tools/mira4/mira4_de_novo.xml	Sun Feb 16 16:30:48 2014 -0500
@@ -79,9 +79,11 @@
         <configfile name="manifest">
 project = MIRA
 job = denovo,${job_type},${job_quality}
-parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp
+parameters = -NW:cmrnl=no -DI:trt=/tmp
 ## -GE:not is short for -GENERAL:number_of_threads and using one (1)
 ## can be useful for repeatability of assemblies and bug hunting.
+## This is overriden by the command line -t switch which is easier
+## to set from within Galaxy.
 ##
 ## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength
 ## and without this MIRA aborts with read names over 40 characters
@@ -99,16 +101,19 @@
 technology = ${rg.technology}
 ##Record the segment placement (if any)
 #if str($rg.segments.type) == "paired"
-segmentplacement = ${rg.segments.placement}
-segmentnaming = ${rg.segments.naming}
+segment_placement = ${rg.segments.placement}
+segment_naming = ${rg.segments.naming}
 #if str($rg.segments.min_size) != "" or str($rg.segments.max_size) != ""
 ##If our min/max validation failed I trust MIRA to give an error message...
-templatesize = $rg.segments.min_size $rg.segments.max_size
+template_size = $rg.segments.min_size $rg.segments.max_size
 #end if
 #end if
-#if str($rg.segments.type) == "none"
-segmentplacement = ?
-#end if
+##if str($rg.segments.type) == "none"
+##MIRA4 manual says use segment_placement = unknown or ? for unpaired data
+##but this stopped working in MIRA 4.0 RC5 and 4.0 (final). See:
+##http://www.freelists.org/post/mira_talk/Unpaired-reads-and-segment-placement--or-unknown
+##segment_placement = ?
+##end if
 ##MIRA will accept multiple filenames on one data line, or multiple data lines
 #for $f in $rg.filenames
 ##Must now map Galaxy datatypes to MIRA file types...
@@ -130,6 +135,9 @@
         <!-- Tiger mitochondria, selected paired end Illumina reads from SRR639755
              Note we're using just one repeat group, and only the filenames parameter
              within it, so this should work with current test framework limitations:
+             TODO: Revise example and/or -NW:cac=warn and -NW:acv=80 settings
+             MIRA 4.0 complains as coverage is about x93 which is over 80 limit.
+             Also MIRA 4.0 gives three contigs as output.
         <test>
             <param name="job_type" value="genome" />
             <param name="job_quality" value="accurate" />
@@ -153,14 +161,17 @@
 	<!-- Simple assembly based on MIRA's minidemo/solexa1 example
              Note we're using just one repeat group,
              but two parameters within the repeat (filename, no pairing)
+	     -->
         <test>
             <param name="job_type" value="genome" />
             <param name="job_quality" value="accurate" />
             <param name="type" value="none" />
             <param name="filenames" value="ecoli.fastq" ftype="fastqsanger" />
             <output name="out_fasta" file="ecoli.mira4_de_novo.fasta" ftype="fasta" />
+            <output name="out_bam" file="empty_file.dat" compare="contains" />
+            <output name="out_maf" file="empty_file.dat" compare="contains" />
+            <output name="out_log" file="empty_file.dat" compare="contains" />
         </test>
-        -->
     </tests>
     <help>
 
--- a/tools/mira4/mira4_mapping.xml	Mon Feb 03 06:25:35 2014 -0500
+++ b/tools/mira4/mira4_mapping.xml	Sun Feb 16 16:30:48 2014 -0500
@@ -80,9 +80,11 @@
         <configfile name="manifest">
 project = MIRA
 job = mapping,${job_type},${job_quality}
-parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp
+parameters = -NW:cmrnl=no -DI:trt=/tmp
 ## -GE:not is short for -GENERAL:number_of_threads and using one (1)
 ## can be useful for repeatability of assemblies and bug hunting.
+## This is overriden by the command line -t switch which is easier
+## to set from within Galaxy.
 ##
 ## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength
 ## and without this MIRA aborts with read names over 40 characters
@@ -128,12 +130,15 @@
 #end if
 ##Record the segment placement (if any)
 #if str($rg.segments.type) == "paired"
-segmentplacement = ${rg.segments.placement}
-segmentnaming = ${rg.segments.naming}
+segment_placement = ${rg.segments.placement}
+segment_naming = ${rg.segments.naming}
 #end if
-#if str($rg.segments.type) == "none"
-segmentplacement = ?
-#end if
+##if str($rg.segments.type) == "none"
+##MIRA4 manual says use segment_placement = unknown or ? for unpaired data
+##but this stopped working in MIRA 4.0 RC5 and 4.0 (final). See:
+##http://www.freelists.org/post/mira_talk/Unpaired-reads-and-segment-placement--or-unknown
+##segment_placement = ?
+##end if
 ##MIRA will accept multiple filenames on one data line, or multiple data lines
 #for $f in $rg.filenames
 ##Must now map Galaxy datatypes to MIRA file types...