Mercurial > repos > peterjc > mira4_assembler
changeset 13:7fcabeeca5df draft
Uploaded v0.0.2 preview 5, fixes for MIRA 4.0 (final), more verbose error if $MIRA4 path wrong
author | peterjc |
---|---|
date | Sun, 16 Feb 2014 16:30:48 -0500 |
parents | a6a56440567c |
children | 133b863a8a40 |
files | test-data/empty_file.dat tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_bait.py tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml |
diffstat | 5 files changed, 39 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/mira4/README.rst Mon Feb 03 06:25:35 2014 -0500 +++ b/tools/mira4/README.rst Sun Feb 16 16:30:48 2014 -0500 @@ -1,7 +1,7 @@ Galaxy wrapper for the MIRA assembly program (v4.0) =================================================== -This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute +This tool is copyright 2011-2014 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -98,7 +98,7 @@ For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use the following command from the Galaxy root folder:: - $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq + $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4_bait.xml tools/mira4/mira4.py tools/mira4/mira4_make_bam.py tools/mira4/mira4_validator.py tools/mira4/mira4_bait.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs.fasta test-data/tvc_map_ref_strain.fasta test-data/tvc_map_same_strain.fasta test-data/tvc_bait.fasta test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq test-data/empty_file.dat Check this worked:: @@ -120,6 +120,7 @@ test-data/tvc_mini_bait_pos.fastq test-data/tvc_mini_bait_strict.fastq test-data/tvc_mini_bait_neg.fastq + test-data/empty_file.dat
--- a/tools/mira4/mira4.py Mon Feb 03 06:25:35 2014 -0500 +++ b/tools/mira4/mira4.py Sun Feb 16 16:30:48 2014 -0500 @@ -40,10 +40,12 @@ stop_err("Environment variable $MIRA4 not set") mira_binary = os.path.join(mira_path, "mira") if not os.path.isfile(mira_binary): - stop_err("Missing mira under $MIRA4, %r" % mira_binary) + stop_err("Missing mira under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path)))) mira_convert = os.path.join(mira_path, "miraconvert") if not os.path.isfile(mira_convert): - stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert) + stop_err("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path)))) mira_ver = get_version(mira_binary) if not mira_ver.strip().startswith("4.0"):
--- a/tools/mira4/mira4_bait.py Mon Feb 03 06:25:35 2014 -0500 +++ b/tools/mira4/mira4_bait.py Sun Feb 16 16:30:48 2014 -0500 @@ -43,7 +43,8 @@ stop_err("Environment variable $MIRA4 not set") mira_binary = os.path.join(mira_path, "mirabait") if not os.path.isfile(mira_binary): - stop_err("Missing mirabait under $MIRA4, %r" % mira_binary) + stop_err("Missing mirabait under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path)))) mira_ver = get_version(mira_binary) if not mira_ver.strip().startswith("4.0"): stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
--- a/tools/mira4/mira4_de_novo.xml Mon Feb 03 06:25:35 2014 -0500 +++ b/tools/mira4/mira4_de_novo.xml Sun Feb 16 16:30:48 2014 -0500 @@ -79,9 +79,11 @@ <configfile name="manifest"> project = MIRA job = denovo,${job_type},${job_quality} -parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp +parameters = -NW:cmrnl=no -DI:trt=/tmp ## -GE:not is short for -GENERAL:number_of_threads and using one (1) ## can be useful for repeatability of assemblies and bug hunting. +## This is overriden by the command line -t switch which is easier +## to set from within Galaxy. ## ## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength ## and without this MIRA aborts with read names over 40 characters @@ -99,16 +101,19 @@ technology = ${rg.technology} ##Record the segment placement (if any) #if str($rg.segments.type) == "paired" -segmentplacement = ${rg.segments.placement} -segmentnaming = ${rg.segments.naming} +segment_placement = ${rg.segments.placement} +segment_naming = ${rg.segments.naming} #if str($rg.segments.min_size) != "" or str($rg.segments.max_size) != "" ##If our min/max validation failed I trust MIRA to give an error message... -templatesize = $rg.segments.min_size $rg.segments.max_size +template_size = $rg.segments.min_size $rg.segments.max_size #end if #end if -#if str($rg.segments.type) == "none" -segmentplacement = ? -#end if +##if str($rg.segments.type) == "none" +##MIRA4 manual says use segment_placement = unknown or ? for unpaired data +##but this stopped working in MIRA 4.0 RC5 and 4.0 (final). See: +##http://www.freelists.org/post/mira_talk/Unpaired-reads-and-segment-placement--or-unknown +##segment_placement = ? +##end if ##MIRA will accept multiple filenames on one data line, or multiple data lines #for $f in $rg.filenames ##Must now map Galaxy datatypes to MIRA file types... @@ -130,6 +135,9 @@ <!-- Tiger mitochondria, selected paired end Illumina reads from SRR639755 Note we're using just one repeat group, and only the filenames parameter within it, so this should work with current test framework limitations: + TODO: Revise example and/or -NW:cac=warn and -NW:acv=80 settings + MIRA 4.0 complains as coverage is about x93 which is over 80 limit. + Also MIRA 4.0 gives three contigs as output. <test> <param name="job_type" value="genome" /> <param name="job_quality" value="accurate" /> @@ -153,14 +161,17 @@ <!-- Simple assembly based on MIRA's minidemo/solexa1 example Note we're using just one repeat group, but two parameters within the repeat (filename, no pairing) + --> <test> <param name="job_type" value="genome" /> <param name="job_quality" value="accurate" /> <param name="type" value="none" /> <param name="filenames" value="ecoli.fastq" ftype="fastqsanger" /> <output name="out_fasta" file="ecoli.mira4_de_novo.fasta" ftype="fasta" /> + <output name="out_bam" file="empty_file.dat" compare="contains" /> + <output name="out_maf" file="empty_file.dat" compare="contains" /> + <output name="out_log" file="empty_file.dat" compare="contains" /> </test> - --> </tests> <help>
--- a/tools/mira4/mira4_mapping.xml Mon Feb 03 06:25:35 2014 -0500 +++ b/tools/mira4/mira4_mapping.xml Sun Feb 16 16:30:48 2014 -0500 @@ -80,9 +80,11 @@ <configfile name="manifest"> project = MIRA job = mapping,${job_type},${job_quality} -parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp +parameters = -NW:cmrnl=no -DI:trt=/tmp ## -GE:not is short for -GENERAL:number_of_threads and using one (1) ## can be useful for repeatability of assemblies and bug hunting. +## This is overriden by the command line -t switch which is easier +## to set from within Galaxy. ## ## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength ## and without this MIRA aborts with read names over 40 characters @@ -128,12 +130,15 @@ #end if ##Record the segment placement (if any) #if str($rg.segments.type) == "paired" -segmentplacement = ${rg.segments.placement} -segmentnaming = ${rg.segments.naming} +segment_placement = ${rg.segments.placement} +segment_naming = ${rg.segments.naming} #end if -#if str($rg.segments.type) == "none" -segmentplacement = ? -#end if +##if str($rg.segments.type) == "none" +##MIRA4 manual says use segment_placement = unknown or ? for unpaired data +##but this stopped working in MIRA 4.0 RC5 and 4.0 (final). See: +##http://www.freelists.org/post/mira_talk/Unpaired-reads-and-segment-placement--or-unknown +##segment_placement = ? +##end if ##MIRA will accept multiple filenames on one data line, or multiple data lines #for $f in $rg.filenames ##Must now map Galaxy datatypes to MIRA file types...