changeset 0:32f693f6e741 draft

Uploaded v0.0.1 preview0, very much a work in progress, primarily checking mira_datatypes dependency
author peterjc
date Thu, 26 Sep 2013 12:23:42 -0400
parents
children 99fde64b9563
files test-data/tvc_mini.fastq tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/tool_dependencies.xml
diffstat 6 files changed, 569 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tvc_mini.fastq	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,24 @@
+@gnlti136477918
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
++
+38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*(
+@gnlti136478624
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG
++
++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')'''
+@gnlti136478626
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTTGGTGTTGAGGTCAAGCATGAGGTGGGAAACAGAATCATAAGGAAAAGGATTCATGTGCGTGTGGAAGATGGTCAGGGATCGAGGTGCCATGAGGAGATCATGGTGAGGATCGAGAAGAACGATTAAGGGGAAGGCTGAGGGCGAGGGACAAGGTAAGATTATCAGCGACAAGAGACAAGCGGAGGGAGCCACGGCTGGGTGTGTGGTAGAGGGTGCCTCGGGCCGGGGCAAGGCTAAGCCGAATGCTGGGGATATTCATTAGACTGGGGGGCGGTCGAGGGTGGGTCGTAATGGGCCATTTGCGCGTATGGTGGGGTTGTTTGACATTGCGCTGGCCTCGGTTTACAGGGTTGTGATTGGAAAGCCGTGCGGTTGCCAACGTTAGTGTTTGGGAGACGTTCGCGTTCGGGGGCTGGGGTATTAAGGGGGGTCTGGGGTAAGGGCGTGCGAGGATGGTGGAGGGGTTTGGGGTTGGGCGTCTGTTTCGGGGTTTGTGGCGGGGGGTTGGTGGTTGCGTGACGGTGGGGGGGTTGGGCAGGCTATGGCGGGGCGTGGTGTTGGTCTGGTTGTGAGGATGTGAGTGTGCGTTGTTGTGTATTGGGACAGGT
++
+))..28:>C>CDDDDDDCCCCDDD>>A990028>HFFFIIFDDDDDHOTYYNGFAAAA;>??BQQIDDDIDIIIIGMMDDDDDDNIIIGGFFFFIMYKKIKKDD>D>>>C>D>><<<>::..'')46>IIIQIYYYMFDDAADKKKKYYYYYYNNDDDDIIGGKK777MMFFFKDDFAADDDDFKKKKFFFKIDDDDDDKIIIIEMFF=@@@B@BB??>O???OOTTTTLLKKK???DDDD>AAAA>B994B122:=B44/--447<155>>IIFFIKKKGGGGIIN944499C>>>>>>9</--7/00?;33/5/''''))**,.,,,2/0/20004449,,,-,6,--2:G>D>D74-++.15;911**+/-''))****-,''))1.2-.*****-<>71+**()+19:46.--+-*1611+*((****'''''(-/411-1***.((+***('**-8211,-**'''')+,,4,))''))))'')),,(')))).5++))'')).1-+,,.-+(''(++,,,('''))*''''))')+).''))*)-('')+)*((++.+++-*))('))''))+-0./,,))''))'')''*'')))''****.+*''*))'')'')'')))**+++))'''))'''*''*((****'''')'(,(''''''')*''))''))++*(((*((-))'')-)**()******042))***((*))''))*,-.((*)'')%%%)+++****((***-+*)''''))))''''))''''''')))),))***+('))))+.,)()+**''+.-)))(')''))'(***,(((,,***((((**++'')'(*))'''(**'''******((****//--))0+)''))*****))'')%%'')('*)))-(*01**))'((
+@gnlti136479063
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCCCTGGTGCGTGAGCTCAAAGCGTGGACCGGCGGCATTTTAGGGTGTTTTTCAGCTGGCTCGGTGGTTTGAATGTGACTTGGGCGGGGGGGGGGTCGAAAGGCGAATTTGGAGATTTTCATAAAATTGGGGCGGTTGAAGATTGATTTTAAGGGGCAATTTGGGCTATAGGGGTGGTTTTAAATTAATGGGGGTGGTTTAAAAGTGTGATGGGGAAACGTGGGTTACCAATTTATGGGTGTGTGGAGTTCCCTTTGTGAGGTGGTATAGGAAAGGGGGGGCGTGACCTGCCACGTGGGGGGGGAAGTGTATGGGGGCGGGTTGGGGGGTTGAGGGGGGTGTGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT
++
+04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**(
+@gnlti136479357
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA
++
+.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))''
+@gnlti136479522
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
++
+(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/README.rst	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,114 @@
+Galaxy tool to wrap the MIRA sequence assembly program (v3.4)
+=============================================================
+
+This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below (MIT licence).
+
+This tool is a short Python script (to collect the MIRA output and move it
+to where Galaxy expects the files) and associated Galaxy wrapper XML file.
+
+It is available from the Galaxy Tool Shed at:
+http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler 
+
+
+Automated Installation
+======================
+
+This should be straightforward. Via the Tool Shed, Galaxy should automatically
+install the 'mira' datatype, download and install the precompiled binary for
+MIRA v4.0 for the Galaxy wrapper, and run any tests.
+
+For MIRA 4, the Galaxy wrapper has been split in two, allowing separate
+cluster settings for de novo usage (high RAM) and mapping (lower RAM).
+Consult the Galaxy adminstration documentation for your cluster setup.
+
+WARNING: This tool was developed to construct viral genome assembly and
+mapping pipelines, for which the run time and memory requirements are
+negligible. For larger tasks, be aware that MIRA can require vast amounts
+of RAM and run-times of over a week are possible. This tool wrapper makes
+no attempt to spot and reject such large jobs.
+
+
+Manual Installation
+===================
+
+First install the 'mira' datatype for Galaxy, available here:
+
+* http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes 
+
+There are just two Galaxy files to install:
+
+* mira4.py (the Python script)
+* mira4_de_novo.xml (the Galaxy tool definition for de novo usage)
+* mira4_mapping.xml (the Galaxy tool definition for mapping usage)
+
+The suggested location is a new tools/mira4 folder. You will also need to
+modify the tools_conf.xml file to tell Galaxy to offer the tool, and also do
+this to tools_conf.xml.sample in order to run any tests::
+
+  <tool file="mira4/mira4_de_novo.xml" />
+  <tool file="mira4/mira4_mapping.xml" />
+
+You will also need to install MIRA, we used version 4.0 RC2. See:
+
+* http://chevreux.org/projects_mira.html
+* http://sourceforge.net/projects/mira-assembler/
+
+You may wish to use different cluster setups for the de novo and mapping
+tools, see above.
+
+
+History
+=======
+
+======= ======================================================================
+Version Changes
+------- ----------------------------------------------------------------------
+v0.0.1  - Initial version (prototype using MIRA 4.0 RC2, and wrapper for v3.4)
+======= ======================================================================
+
+
+Developers
+==========
+
+Development is on a dedicated GitHub repository:
+https://github.com/peterjc/pico_galaxy/tree/master/tools/mira_4_0
+
+For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
+the following command from the Galaxy root folder::
+
+    $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs_mira4.fasta
+
+Check this worked::
+
+    $ tar -tzf mira4_wrapper.tar.gz
+    tools/mira4/README.rst
+    tools/mira4/mira4_de_novo.xml
+    tools/mira4/mira4_mapping.xml
+    tools/mira4/mira4.py
+    tools/mira4/tool_dependencies.xml
+    test-data/tvc_mini.fastq
+    test-data/tvc_contigs_mira4.fasta
+
+
+Licence (MIT)
+=============
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4.py	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,180 @@
+#!/usr/bin/env python
+"""A simple wrapper script to call MIRA and collect its output.
+"""
+import os
+import sys
+import subprocess
+import shutil
+import time
+
+WRAPPER_VER = "0.0.1" #Keep in sync with the XML file
+
+def stop_err(msg, err=1):
+    sys.stderr.write(msg+"\n")
+    sys.exit(err)
+
+
+def get_version(mira_binary):
+    """Run MIRA to find its version number"""
+    # At the commend line I would use: mira -v | head -n 1
+    # however there is some pipe error when doing that here.
+    cmd = [mira_binary, "-v"]
+    try:
+        child = subprocess.Popen(cmd,
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.STDOUT)
+    except Exception, err:
+        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
+        sys.exit(1)
+    ver, tmp = child.communicate()
+    del child
+    return ver.split("\n", 1)[0]
+
+
+os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"]
+mira_binary = "mira"
+mira_ver = get_version(mira_binary)
+if not mira_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
+if "-v" in sys.argv:
+    print "MIRA wrapper version %s," % WRAPPER_VER
+    print mira_ver
+    sys.exit(0)
+
+
+def log_manifest(manifest):
+    """Write the manifest file to stderr."""
+    sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60))
+    with open(manifest) as h:
+        for line in h:
+            sys.stderr.write(line)
+    sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60))
+
+
+def massage_symlinks(manifest):
+    """Create FASTQ aliases and edit the manifest to use them.
+
+    Short term measure for MIRA 4.0RC2 which depends on data file
+    extensions to decide the file format, and doesn't like *.dat
+    as used in Galaxy.
+    """
+    base = os.path.split(manifest)[0]
+    with open(manifest) as h:
+        lines = h.readlines()
+    f = 0
+    for i, line in enumerate(lines):
+         if not line.startswith("data ="):
+             continue
+         #Assumes no spaces in filename, would they have to be escaped?
+         new_line = "data ="
+         for filename in line[6:].strip().split():
+             if not filename:
+                 continue
+             assert os.path.isfile(filename), filename
+             f += 1
+             alias = os.path.join(base, "input%i.fastq" % f)
+             new_line += " " + alias
+             cmd = "ln -s %s %s" % (filename, alias)
+             if os.system(cmd):
+                 stop_err("Problem creating FASTQ alias:\n%s" % cmd)
+         lines[i] = new_line + "\n"
+    with open(manifest, "w") as h:
+        for line in lines:
+            #sys.stderr.write(line)
+            h.write(line)
+    return True
+
+
+def collect_output(temp, name):
+    n3 = (temp, name, name, name)
+    f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
+    if not os.path.isdir(f):
+        log_manifest(manifest)
+        stop_err("Missing output folder")
+    if not os.listdir(f):
+        log_manifest(manifest)
+        stop_err("Empty output folder")
+    missing = []
+    for old, new in [("%s/%s_out.maf" % (f, name), out_maf),
+                     ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]:
+        if not os.path.isfile(old):
+            missing.append(os.path.splitext(old)[-1])
+        else:
+            shutil.move(old, new)
+    if missing:
+        log_manifest(manifest)
+        sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f)))
+        stop_err("Missing output files: %s" % ", ".join(missing))
+
+def clean_up(temp, name):
+    folder = "%s/%s_assembly" % (temp, name)
+    if os.path.isdir(folder):
+        shutil.rmtree(folder)
+
+#TODO - Run MIRA in /tmp or a configurable directory?
+#Currently Galaxy puts us somewhere safe like:
+#/opt/galaxy-dist/database/job_working_directory/846/
+temp = "."
+#name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8]
+name = "MIRA"
+manifest, out_maf, out_fasta, out_log = sys.argv[1:5]
+
+#Hack until MIRA v4 lets us specify file format explicitly,
+massage_symlinks(manifest)
+
+start_time = time.time()
+#cmd_list =sys.argv[8:]
+cmd_list = [mira_binary, manifest]
+cmd = " ".join(cmd_list)
+
+assert os.path.isdir(temp)
+d = "%s_assembly" % name
+assert not os.path.isdir(d), "Path %s already exists" % d
+try:
+    #Check path access
+    os.mkdir(d)
+except Exception, err:
+    log_manifest(manifest)
+    sys.stderr.write("Error making directory %s\n%s" % (d, err))
+    sys.exit(1)
+
+#print os.path.abspath(".")
+#print cmd
+
+handle = open(out_log, "w")
+try:
+    #Run MIRA
+    child = subprocess.Popen(cmd_list,
+                             stdout=handle,
+                             stderr=subprocess.STDOUT)
+except Exception, err:
+    log_manifest(manifest)
+    sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+    #TODO - call clean up?
+    handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
+    handle.close()
+    sys.exit(1)
+#Use .communicate as can get deadlocks with .wait(),
+stdout, stderr = child.communicate()
+assert not stdout and not stderr #Should be empty as sent to handle
+run_time = time.time() - start_time
+return_code = child.returncode
+handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0))
+print "MIRA took %0.2f minutes" % (run_time / 60.0)
+if return_code:
+    handle.write("Return error code %i from command:\n" % return_code)
+    handle.write(cmd + "\n")
+    handle.close()
+    clean_up(temp, name)
+    log_manifest(manifest)
+    stop_err("Return error code %i from command:\n%s" % (return_code, cmd),
+             return_code)
+handle.close()
+
+#print "Collecting output..."
+collect_output(temp, name)
+
+#print "Cleaning up..."
+clean_up(temp, name)
+
+print "Done"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_de_novo.xml	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,121 @@
+<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.1">
+    <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description>
+    <requirements>
+        <requirement type="python-module">Bio</requirement>
+        <requirement type="binary">mira</requirement>
+        <requirement type="package" version="4.0">MIRA</requirement>
+    </requirements>
+    <version_command interpreter="python">mira4.py -v</version_command>
+    <command interpreter="python">
+mira4.py $manifest $out_maf $out_fasta $out_log
+    </command>
+    <inputs>
+        <param name="job_type" type="select" label="Assembly type">
+            <option value="genome">Genome</option>
+            <option value="est">EST (transcriptome)</option>
+        </param>
+        <param name="job_quality" type="select" label="Assembly quality grade">
+            <option value="accurate">Accurate</option>
+            <option value="draft">Draft</option>
+        </param>
+        <repeat name="read_group" title="Read Group" min="1">
+            <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies">
+                <option value="solexa">Solexa/Illumina</option>
+                <option value="sanger">Sanger cappillary sequencing</option>
+                <option value="454">Roche 454</option>
+                <option value="iontor">Ion Torrent</option>
+                <option value="pcbiolq">PacBio low quality (raw)</option>
+                <option value="pcbiohq">PacBio high quality (corrected)</option>
+                <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>
+		<!-- TODO reference/backbone as an entry here? -->
+            </param>
+	    <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs.">
+                <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" />
+            </repeat>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" />
+        <data name="out_maf" format="mira" label="MIRA Assembly" />
+        <data name="out_log" format="txt" label="MIRA log" />
+    </outputs>
+    <configfiles>
+        <configfile name="manifest">
+project = MIRA
+job = denovo,${job_type},${job_quality}
+parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp
+## -GE:not is short for -GENERAL:number_of_threads and using one (1)
+## can be useful for repeatability of assemblies and bug hunting.
+##
+## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength
+## and without this MIRA aborts with read names over 40 characters
+## due to limitations of some downstream tools.
+##
+## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should
+## point to a local hard drive (not something like NFS on network).
+
+#for $rg in $read_group
+#=======================================================
+readgroup
+technology = ${rg.technology}
+##MIRA will accept multiple filenames on one data line, or multiple data lines
+#for f in $rg.reads
+data = ${f.filename}
+#end for
+### Cheetah doesn't want dollar sign on list comprehension intermediate variables
+###set $files = ' '.join([str(f['filename']) for f in rg['reads']])
+##data = $files
+#end for
+        </configfile>
+    </configfiles>
+    <tests>
+            <!-- Based on the MIRA v3.4.1.1 bundled minidemo/estdemo2 which uses
+                 strain data and miraSearchESTSNPs. Here we just assemble it. --> 
+<!--
+Commenting out test until Galaxy framework is fixed,
+https://trello.com/c/zSTrfDOB/820-disambiguated-conditional-parameters-not-supported-in-unit-tests
+        <test>
+            <param name="job_method" value="denovo" />
+            <param name="job_type" value="est" />
+            <param name="job_qual" value="accurate" />
+            <param name="condBackbone.use" value="false" />
+            <param name="condSanger.use" value="true" />
+            <param name="condSanger.filename" value="tvc_mini.fastq" ftype="fastq" />
+            <param name="condRoche.use" value="false" />
+            <param name="condIllumina.use" value="false" /> 
+            <param name="condIonTorrent.use" value="false" />
+            <output name="out_fasta" file="tvc_contigs.fasta" ftype="fasta" />
+	</test>
+-->
+    </tests>
+    <help>
+
+**What it does**
+
+Runs MIRA v4.0 in de novo mode, collects the output, and throws away all the temporary files.
+
+MIRA is an open source assembly tool capable of handling sequence data from
+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent
+and also PacBio).
+
+It is particularly suited to small genomes such as bacteria.
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/mira4_mapping.xml	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,103 @@
+<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.1">
+    <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description>
+    <requirements>
+        <requirement type="python-module">Bio</requirement>
+        <requirement type="binary">mira</requirement>
+        <requirement type="package" version="4.0">MIRA</requirement>
+    </requirements>
+    <version_command interpreter="python">mira4.py -v</version_command>
+    <command interpreter="python">
+mira4.py $manifest $out_maf $out_fasta $out_log
+    </command>
+    <inputs>
+        <param name="job_type" type="select" label="Assembly type">
+            <option value="genome">Genome</option>
+            <option value="est">EST (transcriptome)</option>
+        </param>
+        <param name="job_quality" type="select" label="Assembly quality grade">
+            <option value="accurate">Accurate</option>
+            <option value="draft">Draft</option>
+        </param>
+        <repeat name="read_group" title="Read Group" min="1">
+            <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies">
+                <option value="solexa">Solexa/Illumina</option>
+                <option value="sanger">Sanger cappillary sequencing</option>
+                <option value="454">Roche 454</option>
+                <option value="iontor">Ion Torrent</option>
+                <option value="pcbiolq">PacBio low quality (raw)</option>
+                <option value="pcbiohq">PacBio high quality (corrected)</option>
+                <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>
+		<!-- TODO reference/backbone as an entry here? -->
+            </param>
+	    <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs.">
+                <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" />
+            </repeat>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" />
+        <data name="out_maf" format="mira" label="MIRA Assembly" />
+        <data name="out_log" format="txt" label="MIRA log" />
+    </outputs>
+    <configfiles>
+        <configfile name="manifest">
+project = MIRA
+job = mapping,${job_type},${job_quality}
+parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp
+## -GE:not is short for -GENERAL:number_of_threads and using one (1)
+## can be useful for repeatability of assemblies and bug hunting.
+##
+## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength
+## and without this MIRA aborts with read names over 40 characters
+## due to limitations of some downstream tools.
+##
+## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should
+## point to a local hard drive (not something like NFS on network).
+
+#for $rg in $read_group
+#=======================================================
+readgroup
+technology = ${rg.technology}
+##MIRA will accept multiple filenames on one data line, or multiple data lines
+#for f in $rg.reads
+data = ${f.filename}
+#end for
+### Cheetah doesn't want dollar sign on list comprehension intermediate variables
+###set $files = ' '.join([str(f['filename']) for f in rg['reads']])
+##data = $files
+#end for
+        </configfile>
+    </configfiles>
+    <tests>
+    </tests>
+    <help>
+
+**What it does**
+
+Runs MIRA v4.0 in mapping mode, collects the output, and throws away all the temporary files.
+
+MIRA is an open source assembly tool capable of handling sequence data from
+a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent
+and also PacBio).
+
+It is particularly suited to small genomes such as bacteria.
+
+**Citation**
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite the following papers:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
+Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
+Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
+http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
+
+This wrapper is available to install into other Galaxy Instances via the Galaxy
+Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/mira4/tool_dependencies.xml	Thu Sep 26 12:23:42 2013 -0400
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="MIRA" version="3.4.1.1">
+        <install version="1.0">
+            <actions>
+                <!-- Sourceforge doesn't offer nice clean download URLs which is a shame -->      
+                <action type="download_by_url">https://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0rc2_linux-gnu_x86_64_static.tar.bz2?r=&ts=1380039004&use_mirror=kent</action>
+                <action type="move_directory_files">
+                    <source_directory>mira_4.0rc2_linux-gnu_x86_64_static/bin</source_directory>
+                    <destination_directory>$INSTALL_DIR</destination_directory>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+Downloads MIRA v4.0 RC2 from Sourceforge, requesting Bastien's precompiled binaries
+for 64bit Linux (x86_64). He also has binaries for Mac OS X, which we could
+use once the Galaxy installation framework allow that kind of flexibility.
+
+http://chevreux.org/projects_mira.html
+http://sourceforge.net/projects/mira-assembler/
+        </readme>
+    </package>
+</tool_dependency>
+