Mercurial > repos > peterjc > mira4_assembler
changeset 0:32f693f6e741 draft
Uploaded v0.0.1 preview0, very much a work in progress, primarily checking mira_datatypes dependency
author | peterjc |
---|---|
date | Thu, 26 Sep 2013 12:23:42 -0400 |
parents | |
children | 99fde64b9563 |
files | test-data/tvc_mini.fastq tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/tool_dependencies.xml |
diffstat | 6 files changed, 569 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tvc_mini.fastq Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,24 @@ +@gnlti136477918 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTACCCTCCACCATGAAACCAGGCTTGGGTCCCTCAGGCTGCCTCTTGGTGCTGATAATCTTTCCCTGTGCCTTTGCCTCAGCCTTCAACTTATCATTCTTCTTGATCCTCTCCATTATCTCCTCATGGCAACGAGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATCCTGTTACCAACCTGTTTGTTGACCTCAACACCAACAGCGCGCTTGGTAACATTCCAGACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGAACTCAAACCGTGACCCGCCGGCATTTTGAGGTGTTTTTCAGCTGCCTTGTTCACXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ++ +38<>><><<96-++42:AABBCCCCCCCCDFFFIYYIIIIIINTTTTNNNNNNTTTTTTNNIIIIHHHHHHYIFFFIDIINIITTTOQDDDHHHNTYTFFFIIINNIITDDDDDDFLLTTTLLLYYYYYYFFIIIILKOOYYYNNNNNNOOKKKKILLLFFIOOTTNLLLLLNYYYYKYFDDDLLLNNNTTNNLMKKSYNNJIIGGGGLLIILOOYYYYYYYYYTNNNNNTYYYYYYYTOLLLLLLNTTYYTTTLLTYYKKKONNNNLLLLGGINIIIIIINNNNNNNNIHHHHHHHHHHINIITTTTNNNNNTYTNNNNIIIFFDHHHFFINNNNIHHHDDEIDDDNNDDKQQQQMMMQQYYNNIDCBBBBAHIGGGKYYYOOD?<AACCCCCHCCC@>>>>HBBAAAA>@999AOOOYIIICC<<,,,99HHHFKK??C>>B>>H?6/+))42856301:7<>HHEI4/))-10449--0..((*4))*35A<9+++44>BB754---@<;42*))45:7024.(')))')++049>>41-'(,'(.2393222/3171((((-.4011/0+).)''),..4133><B=451119411+))<44:686:/066888888=::884))*'''**,''*-.''*,/2(*144+')64>;1/,'')''1*30+0..****(*0-.4-)*),'(''+,-((*+))**+,''''''***''***-*)121,''''(+*,,+-((****.0..,0*))*(),))''))*+,*)()))''''+'')'')**)()'','')'(**((*((*(((*441.-*****())+*''')-++*****-*((((**))))))*)))++***)(**11.()****0*-,((*--.***,((,,,**'')'''')'-((--,''**441***)+'(''*,*( +@gnlti136478624 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCACGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGATAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCTCTGGGTGCGGGAACTCAAACCGGGAACCGGCGGCATTTTGCGGTGTTTTTAGACCCTGCCGGGGGGGCGGTCGAAAGGCCGATTCTTGAGATTTTCCXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXGGGGGTAGGAGGTTGTAATTGGAAAAACCTGGGGTAGCAAGTTAATGGCTTGAGCAATTCCGTTCGGCGGGTGGGTATAGAGAAGGGGCGGGCGATCGGGATCCGAAGATGGGGAGCGGATGGGGAGGAGAGGCAGGTGGGGATATAGGGGGGGGGTGGGGTAGGGGAGGGGCGGTGCTGTAGGGGGAGGGGCGGCGTTGGTTTTCTGTGTTACGAGTTGGGTGACCCGAAGTAATTGGGG ++ ++1449>>>;=::AADDCCCCFIICCBB>???BBDDDDYYHBCCBBFF@@777BBG@>7584;;@DHDDDDDDMNIIIIIYYYTOYKKKMIDDDDDHOKKKQSTTTNNIIYYFFFFFIDDDDDIYOOIIDAA>DADDFDLLDDDIKKKKOKKKKKKYYYOOJJJOYYYYTOOKKPMMMMMSSSSMMMSSYYYYLJIIIID=====FKKKKKKYYYOOKKIIIIISSFFDIHIIKSSTOOKKKLYYSSKMIIIOOIIIDDDDHDDDIOOIIFFFIIIIKKKMIIIIIIMKKKKIIIFDDDDADDIDDDDDDDHDDDDFFF99///<<HFFFFFFFFGOOYTDDDHHH99,,,95>>>>47//-</3-822.446777BBBFFIOC>6.++-53:?:>7744213...772007:9:-++33>>DH>>??933;;FQ<93/+10++/.//-10234:1//223;:/,,***++'')'+,/)))-.2.++((.0***,))*,0(())''))))+'')***''))***))),669+,*****..''')*,**,*))))*'',)))'(++,++((*+*)*.*))''')***''.*))'')''''''***+)))++**(''''')****)''')'(***''**+/.)))*)')((''***(('')'')-))''''.'')))**'+''''**))''))***+((***)%(((***(((((,.,,(((((*(((+.(()'''')*(())(***((**-+,,)')''*/,''''**'''))((''*+((''''))*))'')'')),.)())'''''('*)**+***-*(')''))((+++0***(('')'')**()++*+**(')).5+*'''')*,---'''')''' +@gnlti136478626 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCTCAAAATGCCGGCGGGTCACGGTTTGAGGTCACGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTACCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGCGCTGTTGGTGTTGAGGTCAAGCATGAGGTGGGAAACAGAATCATAAGGAAAAGGATTCATGTGCGTGTGGAAGATGGTCAGGGATCGAGGTGCCATGAGGAGATCATGGTGAGGATCGAGAAGAACGATTAAGGGGAAGGCTGAGGGCGAGGGACAAGGTAAGATTATCAGCGACAAGAGACAAGCGGAGGGAGCCACGGCTGGGTGTGTGGTAGAGGGTGCCTCGGGCCGGGGCAAGGCTAAGCCGAATGCTGGGGATATTCATTAGACTGGGGGGCGGTCGAGGGTGGGTCGTAATGGGCCATTTGCGCGTATGGTGGGGTTGTTTGACATTGCGCTGGCCTCGGTTTACAGGGTTGTGATTGGAAAGCCGTGCGGTTGCCAACGTTAGTGTTTGGGAGACGTTCGCGTTCGGGGGCTGGGGTATTAAGGGGGGTCTGGGGTAAGGGCGTGCGAGGATGGTGGAGGGGTTTGGGGTTGGGCGTCTGTTTCGGGGTTTGTGGCGGGGGGTTGGTGGTTGCGTGACGGTGGGGGGGTTGGGCAGGCTATGGCGGGGCGTGGTGTTGGTCTGGTTGTGAGGATGTGAGTGTGCGTTGTTGTGTATTGGGACAGGT ++ +))..28:>C>CDDDDDDCCCCDDD>>A990028>HFFFIIFDDDDDHOTYYNGFAAAA;>??BQQIDDDIDIIIIGMMDDDDDDNIIIGGFFFFIMYKKIKKDD>D>>>C>D>><<<>::..'')46>IIIQIYYYMFDDAADKKKKYYYYYYNNDDDDIIGGKK777MMFFFKDDFAADDDDFKKKKFFFKIDDDDDDKIIIIEMFF=@@@B@BB??>O???OOTTTTLLKKK???DDDD>AAAA>B994B122:=B44/--447<155>>IIFFIKKKGGGGIIN944499C>>>>>>9</--7/00?;33/5/''''))**,.,,,2/0/20004449,,,-,6,--2:G>D>D74-++.15;911**+/-''))****-,''))1.2-.*****-<>71+**()+19:46.--+-*1611+*((****'''''(-/411-1***.((+***('**-8211,-**'''')+,,4,))''))))'')),,(')))).5++))'')).1-+,,.-+(''(++,,,('''))*''''))')+).''))*)-('')+)*((++.+++-*))('))''))+-0./,,))''))'')''*'')))''****.+*''*))'')'')'')))**+++))'''))'''*''*((****'''')'(,(''''''')*''))''))++*(((*((-))'')-)**()******042))***((*))''))*,-.((*)'')%%%)+++****((***-+*)''''))))''''))''''''')))),))***+('))))+.,)()+**''+.-)))(')''))'(***,(((,,***((((**++'')'(*))'''(**'''******((****//--))0+)''))*****))'')%%'')('*)))-(*01**))'(( +@gnlti136479063 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTAGCGTGGTCGCGGCCGAGGTACCCTCTACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGCCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGGCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGGCCGAGCAAATAGATCCCTGGTGCGTGAGCTCAAAGCGTGGACCGGCGGCATTTTAGGGTGTTTTTCAGCTGGCTCGGTGGTTTGAATGTGACTTGGGCGGGGGGGGGGTCGAAAGGCGAATTTGGAGATTTTCATAAAATTGGGGCGGTTGAAGATTGATTTTAAGGGGCAATTTGGGCTATAGGGGTGGTTTTAAATTAATGGGGGTGGTTTAAAAGTGTGATGGGGAAACGTGGGTTACCAATTTATGGGTGTGTGGAGTTCCCTTTGTGAGGTGGTATAGGAAAGGGGGGGCGTGACCTGCCACGTGGGGGGGGAAGTGTATGGGGGCGGGTTGGGGGGTTGAGGGGGGTGTGGGTGTGGGGGTGGGTTTTGTTAGGCGAGGTGGTTTTTTTTTCTTTTTTTTTTTAGTGGAGGTGT ++ +04--46:<<B<<>@>HHEB<822<<IEHIHCCCCCCIIIITTIIIIIINNTTTTTYTIHHHHHHNNNIIDDDDFFNTKM>>?OQFFFMKOOTFDDDDHHIIIIOHFFFFFINTDDAAAADHHDDDOYNNHHFFDDDDDDFDC=AA=DIIIIFFNHHFFFFNNNNNNNNNNDD448DNTOOKKKOBB?DFGGGNOTOO555>>A>>>AAF:::>>@DB=====5AACOIIBCCBB<5005<41''+18EAAAHHHB>96-+,+14:AAIB??>>CD>>;87>5:30-14477<>@CDDD>>?==MQYI>H---88:77:<B>>=33000008<9::>BBBFHHCCC>IFDDDOOOQIQQII:2((+6<552228>DDDEH>>33399>31)''-.FFIMIIIO>>333;@II>71:37<AAEIAA778<B69,,,01BBIKFF>>>944,,,6:6/(((*44<<43,,,66AEH98,,,6/+**--..((*,1><::65/0*'))'(,-,)++*31+((*((**.,+*'')'()'''*++))''('*+26410''''+)(())''))*'(***++*))*((****(''''')++**)+'')*))*.-***))*)*-/****,-30.)''''''''***''''')-.*))'')''**++*))/,,((,+-***+)'''''')+'''.*)')'(0-+((+++)))'''(*+'''')'(**,***''''))*))'')))),''))''))*((***))/()(*''''++**((((((****(')))))*))'),))'')''.)))))))'')+,++')-))'(+))***))''))****++))))+1-**))**'(140''))**))'')+**( +@gnlti136479357 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTGTGGTCGCGGCCGAGGTACCCTGCACCATGAAACCAGGCTTGGGTCCCTCTGGCTGTCTCTTGGTGCTGATAATCTTACCTTGTGCCTTGGCCTCAGCCTTCAACTTATCGTTCTTCTTGATCCTCTCCATGATCTCCTCATGGCACCTTGATGGCTGGACATGTTCCACACGAACATGAATCCTCTTCCTTATGATTCTGTTTCCAACCTGCTTGTTGACCTCAACACCAACAGCGCGCTTGGTGACGTTCCATACCCGACCCGTGCGGCCATGGTAGAACTTGTGGGGCATACCTTTGTGGATCGACCCGTTAACCTTGACATCAACATAGTCGCCGACTTTGAAGATACGAAGGTAAGTTGTGAGATGGGTAGGACCCTTCTTCCTGAATGCCCGAGCAAATAGATCCCTGGTGCGTGGACTCCAAACGTGAACTTGCCGGGCGGGGGGGAGAGGGGGAGCGGGGGGGGGGGAGAATAAGGGGGGAGGGGAGGGGGAGAGAAAAGGGAGGAGGGGGGGGGTAGGGAGGGAGAGGGAGGGGGGAGGGGGGGGGAGGGGGGGGGGGGAGGGAAGGGGGGGGAGGGGGGGAGGGGGAGGGGAGAGGGGGAGGGGAGGGGGGGGGGAGGGGAGGGGAAGAGGGGGGGGGGGGGGGGGGGAGGGGGGGAGGGGGGGGGGGAGGGGGGGGGGGGGGAGGGAGAAGGAGAAA ++ +.4<BB;>>>>>>>>FDCCCCCCIINIIIDCCCCCCDDDDYQKKFNNNCAAAAAINNIIINTIIHHDDDDDDDDDDDKITTTTLYYYYYLFFIIIILOKKKIIIIKKOKYFDDDDDFIIIIIKKKLLLLTIDDDDDFFDDDDDNNIIIIIKKDDDDHHJOYYSSMMFFADDDDLYSSB>666>BDDDDKOOKJJOOJJED==99AOIJJOOYYYLJJJLLTTTTLYYYYYYYYYYLLIIBBADDNOIIIIIINDAADDDDKOOIIIIIFDDA>7==@@DII??887BBOOFDDDDDIYYNNNHDDKOO?BBHHINODDAF>A>AADFFIIOGFFFFIITOOIDDDDDDDDDDDHHD89,,,<>FFFDD>99<<<B<845;<BAAA;>99=EBIIIIIOOD@@><>AB<8::AA:>AABHIHHHCCC99--+46CCCIIIIAA551-4440++)))4499+))019<>>>1/()0/-('''129.,//+((**+++8@@,*)11))*+***+++))%(,.*)))..,.2+**+8..)))),*))'')))''.''+)*++)+)))''))'(++++*))'''''))****))''))/.03:=741.''**),''''**))))))4**)')'').11.('*))'%)*-.2))*.0('''))(')''))****))('+'')''))****,((((**))1..''))***)1-1-.''),,''))%(.**)(')))*)-().-.***))1)''+''))****))'' +@gnlti136479522 +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTTTCGAGCGGCCGCCCGGGCAGGTCTGAAAAACACCGCAAAATGCCGGCGGGTCACGGTTTGGGGTCGCGCACCAGAGATCTATTTGCTCGGGCATTCAGGAAGAAGGGTCCTACCCATCTCACAACTTACCTTCGTATCTTCAAAGTCGGCGACTATGTTGATGTCAAGGTTAACGGGTCGATCCACAAAGGTATGCCCCACAAGTTCTATCATGGCCGCACGGGTCGGGTATGGAACGTCACCAAGCGTGCTGTTGGGTGTNGAGGTCAAAAGCAAGTTGGAAACAGAATCATAAGGAAGAGGATTCATGTCCGTGTGGAACATGTCCAGCCATCAAGGTGCCATGAGGAGATCATGGAGAGGATCAAGAAGAACGATTAAGTGAAGGCTGAGGGCAAGGCACAAGGTATGATTATCAGGACCAAGAGACAGGCAGAGGGGACCAAGGCCTGGTTTCATGGGTXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ++ +(/..2>>H@CACCHICCCCCCIIYTTTFA>>>ADIIIIOTNNNNNIKINIIHDDDIIIHOMMFNNHDDDDDFINIIKOKKKFFIIIITYTLYIIIIIINTTTIFFFFFKLLYYYYYNNNNHHDDDGGNYYYYYYFDGDDDTHIIIIIIIKFFFIIITYINIIIITTTTYYYYTTNNNNNNNIIIIIIIIIFFIOOOOOIFFFFIIINOFKKNND84**+::FFFDHDDDDDIDD>44***49IIIFCIA?94233AIIIMQOOBFF:4-***66CCCCD>>444>?B44*((***45C>@BHIAAAA94%!%44=1-''''))''+(+/,((*245411.40)((+4::79..***-+/.()14BEEIIBCFIIHD88,,,NBID>>A>BB?AAAA>H:::;::4-+,,4/;46,**4841))/1.''*)))+444+++520'')11)(*.+,0**0((*159501224594406652//-/-2,/*1*')+.()./1.01::>>>>5511.4***1:5*((,-/-((******+*-'')((/20-)-,-*++.1/.(())''),351''))'(..280.'')+()**,398..''))**((+1.(())''))**-.--,,**''*)((-)***(()),1,/.1,))))+,+*+*++,-,'''')**((+*++,))))''))**1.,,***+****,+**+++4***.*))'')'')))''))*.5811.--+,+*+*))+,,-..+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/README.rst Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,114 @@ +Galaxy tool to wrap the MIRA sequence assembly program (v3.4) +============================================================= + +This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below (MIT licence). + +This tool is a short Python script (to collect the MIRA output and move it +to where Galaxy expects the files) and associated Galaxy wrapper XML file. + +It is available from the Galaxy Tool Shed at: +http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + + +Automated Installation +====================== + +This should be straightforward. Via the Tool Shed, Galaxy should automatically +install the 'mira' datatype, download and install the precompiled binary for +MIRA v4.0 for the Galaxy wrapper, and run any tests. + +For MIRA 4, the Galaxy wrapper has been split in two, allowing separate +cluster settings for de novo usage (high RAM) and mapping (lower RAM). +Consult the Galaxy adminstration documentation for your cluster setup. + +WARNING: This tool was developed to construct viral genome assembly and +mapping pipelines, for which the run time and memory requirements are +negligible. For larger tasks, be aware that MIRA can require vast amounts +of RAM and run-times of over a week are possible. This tool wrapper makes +no attempt to spot and reject such large jobs. + + +Manual Installation +=================== + +First install the 'mira' datatype for Galaxy, available here: + +* http://toolshed.g2.bx.psu.edu/view/peterjc/mira_datatypes + +There are just two Galaxy files to install: + +* mira4.py (the Python script) +* mira4_de_novo.xml (the Galaxy tool definition for de novo usage) +* mira4_mapping.xml (the Galaxy tool definition for mapping usage) + +The suggested location is a new tools/mira4 folder. You will also need to +modify the tools_conf.xml file to tell Galaxy to offer the tool, and also do +this to tools_conf.xml.sample in order to run any tests:: + + <tool file="mira4/mira4_de_novo.xml" /> + <tool file="mira4/mira4_mapping.xml" /> + +You will also need to install MIRA, we used version 4.0 RC2. See: + +* http://chevreux.org/projects_mira.html +* http://sourceforge.net/projects/mira-assembler/ + +You may wish to use different cluster setups for the de novo and mapping +tools, see above. + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.1 - Initial version (prototype using MIRA 4.0 RC2, and wrapper for v3.4) +======= ====================================================================== + + +Developers +========== + +Development is on a dedicated GitHub repository: +https://github.com/peterjc/pico_galaxy/tree/master/tools/mira_4_0 + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use +the following command from the Galaxy root folder:: + + $ tar -czf mira4_wrapper.tar.gz tools/mira4/README.rst tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/mira4.py tools/mira4/tool_dependencies.xml test-data/tvc_mini.fastq test-data/tvc_contigs_mira4.fasta + +Check this worked:: + + $ tar -tzf mira4_wrapper.tar.gz + tools/mira4/README.rst + tools/mira4/mira4_de_novo.xml + tools/mira4/mira4_mapping.xml + tools/mira4/mira4.py + tools/mira4/tool_dependencies.xml + test-data/tvc_mini.fastq + test-data/tvc_contigs_mira4.fasta + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4.py Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,180 @@ +#!/usr/bin/env python +"""A simple wrapper script to call MIRA and collect its output. +""" +import os +import sys +import subprocess +import shutil +import time + +WRAPPER_VER = "0.0.1" #Keep in sync with the XML file + +def stop_err(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + + +def get_version(mira_binary): + """Run MIRA to find its version number""" + # At the commend line I would use: mira -v | head -n 1 + # however there is some pipe error when doing that here. + cmd = [mira_binary, "-v"] + try: + child = subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) + sys.exit(1) + ver, tmp = child.communicate() + del child + return ver.split("\n", 1)[0] + + +os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"] +mira_binary = "mira" +mira_ver = get_version(mira_binary) +if not mira_ver.strip().startswith("4.0"): + stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) +if "-v" in sys.argv: + print "MIRA wrapper version %s," % WRAPPER_VER + print mira_ver + sys.exit(0) + + +def log_manifest(manifest): + """Write the manifest file to stderr.""" + sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) + with open(manifest) as h: + for line in h: + sys.stderr.write(line) + sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) + + +def massage_symlinks(manifest): + """Create FASTQ aliases and edit the manifest to use them. + + Short term measure for MIRA 4.0RC2 which depends on data file + extensions to decide the file format, and doesn't like *.dat + as used in Galaxy. + """ + base = os.path.split(manifest)[0] + with open(manifest) as h: + lines = h.readlines() + f = 0 + for i, line in enumerate(lines): + if not line.startswith("data ="): + continue + #Assumes no spaces in filename, would they have to be escaped? + new_line = "data =" + for filename in line[6:].strip().split(): + if not filename: + continue + assert os.path.isfile(filename), filename + f += 1 + alias = os.path.join(base, "input%i.fastq" % f) + new_line += " " + alias + cmd = "ln -s %s %s" % (filename, alias) + if os.system(cmd): + stop_err("Problem creating FASTQ alias:\n%s" % cmd) + lines[i] = new_line + "\n" + with open(manifest, "w") as h: + for line in lines: + #sys.stderr.write(line) + h.write(line) + return True + + +def collect_output(temp, name): + n3 = (temp, name, name, name) + f = "%s/%s_assembly/%s_d_results" % (temp, name, name) + if not os.path.isdir(f): + log_manifest(manifest) + stop_err("Missing output folder") + if not os.listdir(f): + log_manifest(manifest) + stop_err("Empty output folder") + missing = [] + for old, new in [("%s/%s_out.maf" % (f, name), out_maf), + ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]: + if not os.path.isfile(old): + missing.append(os.path.splitext(old)[-1]) + else: + shutil.move(old, new) + if missing: + log_manifest(manifest) + sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f))) + stop_err("Missing output files: %s" % ", ".join(missing)) + +def clean_up(temp, name): + folder = "%s/%s_assembly" % (temp, name) + if os.path.isdir(folder): + shutil.rmtree(folder) + +#TODO - Run MIRA in /tmp or a configurable directory? +#Currently Galaxy puts us somewhere safe like: +#/opt/galaxy-dist/database/job_working_directory/846/ +temp = "." +#name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] +name = "MIRA" +manifest, out_maf, out_fasta, out_log = sys.argv[1:5] + +#Hack until MIRA v4 lets us specify file format explicitly, +massage_symlinks(manifest) + +start_time = time.time() +#cmd_list =sys.argv[8:] +cmd_list = [mira_binary, manifest] +cmd = " ".join(cmd_list) + +assert os.path.isdir(temp) +d = "%s_assembly" % name +assert not os.path.isdir(d), "Path %s already exists" % d +try: + #Check path access + os.mkdir(d) +except Exception, err: + log_manifest(manifest) + sys.stderr.write("Error making directory %s\n%s" % (d, err)) + sys.exit(1) + +#print os.path.abspath(".") +#print cmd + +handle = open(out_log, "w") +try: + #Run MIRA + child = subprocess.Popen(cmd_list, + stdout=handle, + stderr=subprocess.STDOUT) +except Exception, err: + log_manifest(manifest) + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + #TODO - call clean up? + handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + handle.close() + sys.exit(1) +#Use .communicate as can get deadlocks with .wait(), +stdout, stderr = child.communicate() +assert not stdout and not stderr #Should be empty as sent to handle +run_time = time.time() - start_time +return_code = child.returncode +handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0)) +print "MIRA took %0.2f minutes" % (run_time / 60.0) +if return_code: + handle.write("Return error code %i from command:\n" % return_code) + handle.write(cmd + "\n") + handle.close() + clean_up(temp, name) + log_manifest(manifest) + stop_err("Return error code %i from command:\n%s" % (return_code, cmd), + return_code) +handle.close() + +#print "Collecting output..." +collect_output(temp, name) + +#print "Cleaning up..." +clean_up(temp, name) + +print "Done"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_de_novo.xml Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,121 @@ +<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.1"> + <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description> + <requirements> + <requirement type="python-module">Bio</requirement> + <requirement type="binary">mira</requirement> + <requirement type="package" version="4.0">MIRA</requirement> + </requirements> + <version_command interpreter="python">mira4.py -v</version_command> + <command interpreter="python"> +mira4.py $manifest $out_maf $out_fasta $out_log + </command> + <inputs> + <param name="job_type" type="select" label="Assembly type"> + <option value="genome">Genome</option> + <option value="est">EST (transcriptome)</option> + </param> + <param name="job_quality" type="select" label="Assembly quality grade"> + <option value="accurate">Accurate</option> + <option value="draft">Draft</option> + </param> + <repeat name="read_group" title="Read Group" min="1"> + <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies"> + <option value="solexa">Solexa/Illumina</option> + <option value="sanger">Sanger cappillary sequencing</option> + <option value="454">Roche 454</option> + <option value="iontor">Ion Torrent</option> + <option value="pcbiolq">PacBio low quality (raw)</option> + <option value="pcbiohq">PacBio high quality (corrected)</option> + <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option> + <!-- TODO reference/backbone as an entry here? --> + </param> + <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs."> + <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" /> + </repeat> + </repeat> + </inputs> + <outputs> + <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" /> + <data name="out_maf" format="mira" label="MIRA Assembly" /> + <data name="out_log" format="txt" label="MIRA log" /> + </outputs> + <configfiles> + <configfile name="manifest"> +project = MIRA +job = denovo,${job_type},${job_quality} +parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp +## -GE:not is short for -GENERAL:number_of_threads and using one (1) +## can be useful for repeatability of assemblies and bug hunting. +## +## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength +## and without this MIRA aborts with read names over 40 characters +## due to limitations of some downstream tools. +## +## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should +## point to a local hard drive (not something like NFS on network). + +#for $rg in $read_group +#======================================================= +readgroup +technology = ${rg.technology} +##MIRA will accept multiple filenames on one data line, or multiple data lines +#for f in $rg.reads +data = ${f.filename} +#end for +### Cheetah doesn't want dollar sign on list comprehension intermediate variables +###set $files = ' '.join([str(f['filename']) for f in rg['reads']]) +##data = $files +#end for + </configfile> + </configfiles> + <tests> + <!-- Based on the MIRA v3.4.1.1 bundled minidemo/estdemo2 which uses + strain data and miraSearchESTSNPs. Here we just assemble it. --> +<!-- +Commenting out test until Galaxy framework is fixed, +https://trello.com/c/zSTrfDOB/820-disambiguated-conditional-parameters-not-supported-in-unit-tests + <test> + <param name="job_method" value="denovo" /> + <param name="job_type" value="est" /> + <param name="job_qual" value="accurate" /> + <param name="condBackbone.use" value="false" /> + <param name="condSanger.use" value="true" /> + <param name="condSanger.filename" value="tvc_mini.fastq" ftype="fastq" /> + <param name="condRoche.use" value="false" /> + <param name="condIllumina.use" value="false" /> + <param name="condIonTorrent.use" value="false" /> + <output name="out_fasta" file="tvc_contigs.fasta" ftype="fasta" /> + </test> +--> + </tests> + <help> + +**What it does** + +Runs MIRA v4.0 in de novo mode, collects the output, and throws away all the temporary files. + +MIRA is an open source assembly tool capable of handling sequence data from +a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent +and also PacBio). + +It is particularly suited to small genomes such as bacteria. + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/mira4_mapping.xml Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,103 @@ +<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.1"> + <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description> + <requirements> + <requirement type="python-module">Bio</requirement> + <requirement type="binary">mira</requirement> + <requirement type="package" version="4.0">MIRA</requirement> + </requirements> + <version_command interpreter="python">mira4.py -v</version_command> + <command interpreter="python"> +mira4.py $manifest $out_maf $out_fasta $out_log + </command> + <inputs> + <param name="job_type" type="select" label="Assembly type"> + <option value="genome">Genome</option> + <option value="est">EST (transcriptome)</option> + </param> + <param name="job_quality" type="select" label="Assembly quality grade"> + <option value="accurate">Accurate</option> + <option value="draft">Draft</option> + </param> + <repeat name="read_group" title="Read Group" min="1"> + <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies"> + <option value="solexa">Solexa/Illumina</option> + <option value="sanger">Sanger cappillary sequencing</option> + <option value="454">Roche 454</option> + <option value="iontor">Ion Torrent</option> + <option value="pcbiolq">PacBio low quality (raw)</option> + <option value="pcbiohq">PacBio high quality (corrected)</option> + <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option> + <!-- TODO reference/backbone as an entry here? --> + </param> + <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs."> + <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" /> + </repeat> + </repeat> + </inputs> + <outputs> + <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" /> + <data name="out_maf" format="mira" label="MIRA Assembly" /> + <data name="out_log" format="txt" label="MIRA log" /> + </outputs> + <configfiles> + <configfile name="manifest"> +project = MIRA +job = mapping,${job_type},${job_quality} +parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp +## -GE:not is short for -GENERAL:number_of_threads and using one (1) +## can be useful for repeatability of assemblies and bug hunting. +## +## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength +## and without this MIRA aborts with read names over 40 characters +## due to limitations of some downstream tools. +## +## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should +## point to a local hard drive (not something like NFS on network). + +#for $rg in $read_group +#======================================================= +readgroup +technology = ${rg.technology} +##MIRA will accept multiple filenames on one data line, or multiple data lines +#for f in $rg.reads +data = ${f.filename} +#end for +### Cheetah doesn't want dollar sign on list comprehension intermediate variables +###set $files = ' '.join([str(f['filename']) for f in rg['reads']]) +##data = $files +#end for + </configfile> + </configfiles> + <tests> + </tests> + <help> + +**What it does** + +Runs MIRA v4.0 in mapping mode, collects the output, and throws away all the temporary files. + +MIRA is an open source assembly tool capable of handling sequence data from +a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent +and also PacBio). + +It is particularly suited to small genomes such as bacteria. + +**Citation** + +If you use this Galaxy tool in work leading to a scientific publication please +cite the following papers: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999). +Genome Sequence Assembly Using Trace Signals and Additional Sequence Information. +Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56. +http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html + +This wrapper is available to install into other Galaxy Instances via the Galaxy +Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4/tool_dependencies.xml Thu Sep 26 12:23:42 2013 -0400 @@ -0,0 +1,27 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="MIRA" version="3.4.1.1"> + <install version="1.0"> + <actions> + <!-- Sourceforge doesn't offer nice clean download URLs which is a shame --> + <action type="download_by_url">https://downloads.sourceforge.net/project/mira-assembler/MIRA/stable/mira_4.0rc2_linux-gnu_x86_64_static.tar.bz2?r=&ts=1380039004&use_mirror=kent</action> + <action type="move_directory_files"> + <source_directory>mira_4.0rc2_linux-gnu_x86_64_static/bin</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable> + </action> + </actions> + </install> + <readme> +Downloads MIRA v4.0 RC2 from Sourceforge, requesting Bastien's precompiled binaries +for 64bit Linux (x86_64). He also has binaries for Mac OS X, which we could +use once the Galaxy installation framework allow that kind of flexibility. + +http://chevreux.org/projects_mira.html +http://sourceforge.net/projects/mira-assembler/ + </readme> + </package> +</tool_dependency> +