comparison tools/mira4/mira4_de_novo.xml @ 0:32f693f6e741 draft

Uploaded v0.0.1 preview0, very much a work in progress, primarily checking mira_datatypes dependency
author peterjc
date Thu, 26 Sep 2013 12:23:42 -0400
parents
children df86ed992a1b
comparison
equal deleted inserted replaced
-1:000000000000 0:32f693f6e741
1 <tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.1">
2 <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description>
3 <requirements>
4 <requirement type="python-module">Bio</requirement>
5 <requirement type="binary">mira</requirement>
6 <requirement type="package" version="4.0">MIRA</requirement>
7 </requirements>
8 <version_command interpreter="python">mira4.py -v</version_command>
9 <command interpreter="python">
10 mira4.py $manifest $out_maf $out_fasta $out_log
11 </command>
12 <inputs>
13 <param name="job_type" type="select" label="Assembly type">
14 <option value="genome">Genome</option>
15 <option value="est">EST (transcriptome)</option>
16 </param>
17 <param name="job_quality" type="select" label="Assembly quality grade">
18 <option value="accurate">Accurate</option>
19 <option value="draft">Draft</option>
20 </param>
21 <repeat name="read_group" title="Read Group" min="1">
22 <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies">
23 <option value="solexa">Solexa/Illumina</option>
24 <option value="sanger">Sanger cappillary sequencing</option>
25 <option value="454">Roche 454</option>
26 <option value="iontor">Ion Torrent</option>
27 <option value="pcbiolq">PacBio low quality (raw)</option>
28 <option value="pcbiohq">PacBio high quality (corrected)</option>
29 <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>
30 <!-- TODO reference/backbone as an entry here? -->
31 </param>
32 <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs.">
33 <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" />
34 </repeat>
35 </repeat>
36 </inputs>
37 <outputs>
38 <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" />
39 <data name="out_maf" format="mira" label="MIRA Assembly" />
40 <data name="out_log" format="txt" label="MIRA log" />
41 </outputs>
42 <configfiles>
43 <configfile name="manifest">
44 project = MIRA
45 job = denovo,${job_type},${job_quality}
46 parameters = -GE:not=1 -NW:cmrnl -DI:trt=/tmp
47 ## -GE:not is short for -GENERAL:number_of_threads and using one (1)
48 ## can be useful for repeatability of assemblies and bug hunting.
49 ##
50 ## -NW:cmrnl is short for -NAG_AND_WARN:check_maxreadnamelength
51 ## and without this MIRA aborts with read names over 40 characters
52 ## due to limitations of some downstream tools.
53 ##
54 ## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should
55 ## point to a local hard drive (not something like NFS on network).
56
57 #for $rg in $read_group
58 #=======================================================
59 readgroup
60 technology = ${rg.technology}
61 ##MIRA will accept multiple filenames on one data line, or multiple data lines
62 #for f in $rg.reads
63 data = ${f.filename}
64 #end for
65 ### Cheetah doesn't want dollar sign on list comprehension intermediate variables
66 ###set $files = ' '.join([str(f['filename']) for f in rg['reads']])
67 ##data = $files
68 #end for
69 </configfile>
70 </configfiles>
71 <tests>
72 <!-- Based on the MIRA v3.4.1.1 bundled minidemo/estdemo2 which uses
73 strain data and miraSearchESTSNPs. Here we just assemble it. -->
74 <!--
75 Commenting out test until Galaxy framework is fixed,
76 https://trello.com/c/zSTrfDOB/820-disambiguated-conditional-parameters-not-supported-in-unit-tests
77 <test>
78 <param name="job_method" value="denovo" />
79 <param name="job_type" value="est" />
80 <param name="job_qual" value="accurate" />
81 <param name="condBackbone.use" value="false" />
82 <param name="condSanger.use" value="true" />
83 <param name="condSanger.filename" value="tvc_mini.fastq" ftype="fastq" />
84 <param name="condRoche.use" value="false" />
85 <param name="condIllumina.use" value="false" />
86 <param name="condIonTorrent.use" value="false" />
87 <output name="out_fasta" file="tvc_contigs.fasta" ftype="fasta" />
88 </test>
89 -->
90 </tests>
91 <help>
92
93 **What it does**
94
95 Runs MIRA v4.0 in de novo mode, collects the output, and throws away all the temporary files.
96
97 MIRA is an open source assembly tool capable of handling sequence data from
98 a range of platforms (Sanger capillary, Solexa/Illumina, Roche 454, Ion Torrent
99 and also PacBio).
100
101 It is particularly suited to small genomes such as bacteria.
102
103 **Citation**
104
105 If you use this Galaxy tool in work leading to a scientific publication please
106 cite the following papers:
107
108 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
109 Galaxy tools and workflows for sequence analysis with applications
110 in molecular plant pathology. PeerJ 1:e167
111 http://dx.doi.org/10.7717/peerj.167
112
113 Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
114 Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
115 Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
116 http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
117
118 This wrapper is available to install into other Galaxy Instances via the Galaxy
119 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira4_assembler
120 </help>
121 </tool>