comparison tools/smalt_wrapper.xml @ 2:ead9eab0bc4c draft default tip

Uploaded
author takadonet
date Thu, 16 Jan 2014 14:32:05 -0500
parents b857363cc1db
children
comparison
equal deleted inserted replaced
1:b857363cc1db 2:ead9eab0bc4c
1 <tool id="smalt_wrapper" name="SMALT" version="0.0.1">
2 <requirements>
3 <requirement type="package" version="0.7.1">smalt</requirement>
4 </requirements>
5 <description>maps query reads onto the reference sequences</description>
6 <command interpreter="python">
7 smalt_wrapper.py
8 --threads="4"
9
10 ## reference source
11 --fileSource=$genomeSource.refGenomeSource
12 #if $genomeSource.refGenomeSource == "history":
13 ##build index on the fly
14 --ref="${genomeSource.ownFile}"
15 --dbkey=$dbkey
16 #else:
17 ##use precomputed indexes
18 --ref="${genomeSource.indices.fields.path}"
19 --do_not_build_index
20 #end if
21
22 ## input file(s)
23 --input1=$paired.input1
24 #if $paired.sPaired == "paired":
25 --input2=$paired.input2
26 #end if
27
28 ## output file
29 --output=$output
30
31 ## run parameters
32 --genAlignType=$paired.sPaired
33 --params=$params.source_select
34 #if $params.source_select != "pre_set":
35 --scorDiff=$params.scorDiff
36 #if $paired.sPaired == "paired":
37 --insertMax=$params.insertMax
38 --insertMin=$params.insertMin
39 --pairTyp=$params.pairTyp
40 #end if
41 --minScor=$params.minScor
42 --partialAlignments=$params.partialAlignments
43 --minBasq=$params.minBasq
44 --seed=$params.seed
45 --complexityWeighted=$params.complexityWeighted
46 --exhaustiveSearch=$params.cExhaustiveSearch.exhaustiveSearch
47 #if $params.cExhaustiveSearch.exhaustiveSearch == "true"
48 --minCover=$params.cExhaustiveSearch.minCover
49 #end if
50 --minId=$params.minId
51 #end if
52
53 ## suppress output SAM header
54 --suppressHeader=$suppressHeader
55 </command>
56 <inputs>
57 <conditional name="genomeSource">
58 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
59 <option value="indexed">Use a built-in index</option>
60 <option value="history">Use one from the history</option>
61 </param>
62 <when value="indexed">
63 <param name="indices" type="select" label="Select a reference genome">
64 <options from_data_table="smalt_indexes">
65 <filter type="sort_by" column="2" />
66 <validator type="no_options" message="No indexes are available" />
67 </options>
68 </param>
69 </when>
70 <when value="history">
71 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
72 </when>
73 </conditional>
74 <conditional name="paired">
75 <param name="sPaired" type="select" label="Is this library mate-paired?">
76 <option value="single">Single-end</option>
77 <option value="paired">Paired-end</option>
78 </param>
79 <when value="single">
80 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
81 </when>
82 <when value="paired">
83 <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
84 <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
85 </when>
86 </conditional>
87 <conditional name="params">
88 <param name="source_select" type="select" label="Smalt settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
89 <option value="pre_set">Commonly Used</option>
90 <option value="full">Full Parameter List</option>
91 </param>
92 <when value="pre_set" />
93 <when value="full">
94 <conditional name="cExhaustiveSearch">
95 <param name="exhaustiveSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Do exhaustive search? (map -x)" help="This flag triggers a more exhaustive search for alignments at the cost of decreased speed." />
96 <when value="true">
97 <param name="minCover" type="float" value="0" label="Minimum cover (map -c)" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent." />
98 </when>
99 <when value="no" />
100 </conditional>
101 <param name="scorDiff" type="integer" value="0" label="Score diff (map -d)" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score." />
102 <param name="insertMax" type="integer" value="500" label="Maximum insert size (map -i)" help="Only in paired-end mode." />
103 <param name="insertMin" type="integer" value="0" label="Minimum insert size (map -j)" help="Only in paired-end mode." />
104 <param name="pairTyp" type="text" size="2" value="pe" label="Type of read pair library (map -l)" help="Can be either 'pe', 'mp' or 'pp'." />
105 <param name="minScor" type="integer" value="0" label="Minimum score (map -m)" help="Sets an absolute threshold of the Smith-Waterman scores." />
106 <param name="partialAlignments" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Partial alignments (map -p)" help="Report partial alignments if they are complementary on the read (split reads)." />
107 <param name="minBasq" type="integer" value="0" label="Base quality threshold (map -q)" help="Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0)." />
108 <param name="seed" type="integer" value="0" label="Seed (map -r)" help="See below." />
109 <param name="complexityWeighted" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Complexity weighted (map -w)" help="Smith-Waterman scores are complexity weighted." />
110 <param name="minId" type="float" value="0" label="Identity threshold (map -y)" help="Sets an identity threshold for a mapping to be reported." />
111 </when>
112 </conditional>
113 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Smalt produces SAM with several lines of header information" />
114 </inputs>
115 <outputs>
116 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
117 <actions>
118 <conditional name="genomeSource.refGenomeSource">
119 <when value="indexed">
120 <action type="metadata" name="dbkey">
121 <option type="from_data_table" name="smalt_indexes" column="1">
122 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
123 <filter type="param_value" ref="genomeSource.indices" column="0"/>
124 </option>
125 </action>
126 </when>
127 <when value="history">
128 <action type="metadata" name="dbkey">
129 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
130 </action>
131 </when>
132 </conditional>
133 </actions>
134 </data>
135 </outputs>
136 <help>
137
138 **What it does**
139
140 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.
141
142 ------
143
144 Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/".
145
146 ------
147
148 **Know what you are doing**
149
150 .. class:: warningmark
151
152 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
153
154 .. __: http://www.sanger.ac.uk/resources/software/smalt/
155
156 ------
157
158 **Input formats**
159
160 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.
161
162 ------
163
164 **A Note on Built-in Reference Genomes**
165
166 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
167
168 ------
169
170 **Outputs**
171
172 The output is in SAM format.
173
174 -------
175
176 **SMALT parameter list**
177
178 This is an exhaustive list of SMALT options:
179
180 For **map**::
181
182 -a
183 Output explicit alignments along with the mappings.
184
185 -c &lt;mincover&gt;
186 Only consider mappings where the k-mer word seeds cover the query read to
187 a minimum extent. If &lt;mincover&gt; is an integer or floating point &gt; 1.0, at
188 least this many bases of the read must be covered by k-mer word seeds. If
189 &lt;mincover&gt; is a floating point &lt;= 1.0, it specifies the fraction of the
190 query read length that must be covered by k-mer word seeds. This option
191 is only valid in conjunction with the '-x' flag.
192
193 -d &lt;scordiff&gt;
194 Set a threshold of the Smith-Waterman alignment score relative to the
195 maximum score. When mapping single reads, all alignments are reported
196 that have Smith-Waterman scores within &lt;scorediff&gt; of the maximum.
197 Mappings with lower scores are skipped. If &lt;scorediff&gt; is set to to a
198 value &lt; 0, all alignments are printed that have scores above the
199 threshold specified with the '-m &lt;minscor&gt;' option.
200 For paired reads, only a value of 0 is supported. With the option '-d 0'
201 all aligments (pairings) with the best score are output. By default
202 (without the option '-d 0') single reads/mates with multiple best mappings
203 are reported as 'not mapped'.
204
205 -f &lt;format&gt;
206 Specifies the output format. &lt;format&gt; can be either 'bam', 'cigar', 'gff',
207 'sam' (default), 'samsoft' or 'ssaha'. Optional extension 'sam:nohead,clip'
208 (see manual)
209
210 -F &lt;inform&gt;
211 Specifies the input format. &lt;inform&gt; can be either 'fastq' (default),
212 'sam' or 'bam' (see: samtools.sourceforge.net). SAM and BAM formats
213 require additional libraries to be installed.
214
215 -g &lt;insfil&gt;
216 Use the distribution of insert sizes stored in the file &lt;insfil&gt;. This
217 file is in ASCII format and can be generated using the 'sample' task see
218 'smalt sample -H' for help).
219
220 -H
221 Print these instructions.
222
223 -i &lt;insertmax&gt;
224 Maximum insert size (only in paired-end mode). The default is 500.
225
226 -j &lt;insertmin&gt;
227 Minimum insert size (only in paired-end mode). The default is 0.
228
229 -l &lt;pairtyp&gt;
230 Type of read pair library. &lt;pairtyp&gt; can be either 'pe', i.e. for
231 the Illumina paired-end library for short inserts (|--&gt; &lt;--|). 'mp'
232 for the Illumina mate-pair library for long inserts (&lt;--| |--&gt;) or
233 'pp' for mates sequenced on the same strand (|--&gt; |--&gt;). 'pe' is the
234 default.
235
236 -m &lt;minscor&gt;
237 Sets an absolute threshold of the Smith-Waterman scores. Mappings with
238 scores below that threshold will not be reported. The default is
239 &lt;minscor&gt; = &lt;wordlen&gt; + &lt;stepsiz&gt; - 1
240
241 -n &lt;nthreads&gt;
242 Run smalt using mutiple threads. &lt;nthread&gt; is the number of additional
243 threads forked from the main thread. The order of the reads in the
244 input files is not preserved for the output unless '-O' is also specified.
245
246 -o &lt;oufilnam&gt;
247 Write mapping output (e.g. SAM lines) to a separate file. If this option
248 is not specified, mappings are written to standard output together with
249 other messages.
250
251 -O
252 Output mappings in the order of the reads in the input files when using
253 multiple threads (option '-n &lt;nthreads&gt;').
254
255 -p
256 Report partial alignments if they are complementary on the read (split
257 reads).
258
259 -q &lt;minbasq&gt;
260 Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0).
261 K-mer words of the read with nucleotides that have a base quality below
262 this threshold are not looked up in the hash index.
263
264 -r &lt;seed&gt;
265 If &lt;seed&gt; &gt;= 0 report an alignment selected at random where there are
266 multiple mappings with the same best alignment score. With &lt;seed&gt; = 0
267 (default) a seed is derived from the current calendar time. If &lt;seed&gt;
268 &lt; 0 reads with multiple best mappings are reported as 'not mapped'.
269
270 -T &lt;tmp_dir&gt;
271 Write temporary files to directory &lt;tmp_dir&gt; (used with input files in
272 SAM/BAM format).
273
274 -w
275 Smith-Waterman scores are complexity weighted.
276
277 -x
278 This flag triggers a more exhaustive search for alignments at the cost
279 of decreased speed. In paired-end mode each mate is mapped independently.
280 (By default the mate with fewer hits in the hash index is mapped first
281 and the vicinity is searched for mappings of its mate.)
282
283 -y &lt;minid&gt;
284 Sets an identity threshold for a mapping to be reported (default: 0).
285 &lt;minid&gt; specifies the number of exactly matching nucleotides either as
286 a positive integer or as a fraction of the read length (&lt;= 1.0).
287
288 </help>
289 </tool>
290
291