comparison tools/smalt_wrapper.xml @ 0:87f4f05b44bb draft

Uploaded
author cjav
date Wed, 13 Feb 2013 13:04:12 -0500
parents
children b857363cc1db
comparison
equal deleted inserted replaced
-1:000000000000 0:87f4f05b44bb
1 <tool id="smalt_wrapper" name="SMALT" version="0.0.1">
2 <requirements>
3 <requirement type="package" version="0.7.1">smalt</requirement>
4 </requirements>
5 <description>maps query reads onto the reference sequences</description>
6 <command interpreter="python">
7 smalt_wrapper.py
8 --threads="4"
9
10 ## reference source
11 --fileSource=$genomeSource.refGenomeSource
12 #if $genomeSource.refGenomeSource == "history":
13 ##build index on the fly
14 --ref="${genomeSource.ownFile}"
15 --dbkey=$dbkey
16 #else:
17 ##use precomputed indexes
18 --ref="${genomeSource.indices.fields.path}"
19 --do_not_build_index
20 #end if
21
22 ## input file(s)
23 --input1=$paired.input1
24 #if $paired.sPaired == "paired":
25 --input2=$paired.input2
26 #end if
27
28 ## output file
29 --output=$output
30
31 ## run parameters
32 --genAlignType=$paired.sPaired
33 --params=$params.source_select
34 #if $params.source_select != "pre_set":
35 --scorDiff=$params.scorDiff
36 #if $paired.sPaired == "paired":
37 --insertMax=$params.insertMax
38 --insertMin=$params.insertMin
39 --pairTyp=$params.pairTyp
40 #end if
41 --minScor=$params.minScor
42 --partialAlignments=$params.partialAlignments
43 --minBasq=$params.minBasq
44 --seed=$params.seed
45 --complexityWeighted=$params.complexityWeighted
46 --exhaustiveSearch=$params.cExhaustiveSearch.exhaustiveSearch
47 #if $params.cExhaustiveSearch.exhaustiveSearch == "true"
48 --minCover=$params.cExhaustiveSearch.minCover
49 #end if
50 --minId=$params.minId
51 #end if
52
53 ## suppress output SAM header
54 --suppressHeader=$suppressHeader
55 </command>
56 <inputs>
57 <conditional name="genomeSource">
58 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
59 <option value="indexed">Use a built-in index</option>
60 <option value="history">Use one from the history</option>
61 </param>
62 <when value="indexed">
63 <param name="indices" type="select" label="Select a reference genome">
64 <options from_data_table="smalt_indexes">
65 <filter type="sort_by" column="2" />
66 <validator type="no_options" message="No indexes are available" />
67 </options>
68 </param>
69 </when>
70 <when value="history">
71 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
72 </when>
73 </conditional>
74 <conditional name="paired">
75 <param name="sPaired" type="select" label="Is this library mate-paired?">
76 <option value="single">Single-end</option>
77 <option value="paired">Paired-end</option>
78 </param>
79 <when value="single">
80 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
81 </when>
82 <when value="paired">
83 <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
84 <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
85 </when>
86 </conditional>
87 <conditional name="params">
88 <param name="source_select" type="select" label="Smalt settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
89 <option value="pre_set">Commonly Used</option>
90 <option value="full">Full Parameter List</option>
91 </param>
92 <when value="pre_set" />
93 <when value="full">
94 <conditional name="cExhaustiveSearch">
95 <param name="exhaustiveSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Do exhaustive search? (map -x)" help="This flag triggers a more exhaustive search for alignments at the cost of decreased speed." />
96 <when value="true">
97 <param name="minCover" type="float" value="0" label="Minimum cover (map -c)" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent." />
98 </when>
99 <when value="no" />
100 </conditional>
101 <param name="scorDiff" type="integer" value="0" label="Score diff (map -d)" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score." />
102 <param name="insertMax" type="integer" value="500" label="Maximum insert size (map -i)" help="Only in paired-end mode." />
103 <param name="insertMin" type="integer" value="0" label="Minimum insert size (map -j)" help="Only in paired-end mode." />
104 <param name="pairTyp" type="text" size="2" value="pe" label="Type of read pair library (map -l)" help="Can be either 'pe', 'mp' or 'pp'." />
105 <param name="minScor" type="integer" value="0" label="Minimum score (map -m)" help="Sets an absolute threshold of the Smith-Waterman scores." />
106 <param name="partialAlignments" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Partial alignments (map -p)" help="Report partial alignments if they are complementary on the read (split reads)." />
107 <param name="minBasq" type="integer" value="0" label="Base quality threshold (map -q)" help="Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0)." />
108 <param name="seed" type="integer" value="0" label="Seed (map -r)" help="See below." />
109 <param name="complexityWeighted" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Complexity weighted (map -w)" help="Smith-Waterman scores are complexity weighted." />
110 <param name="minId" type="float" value="0" label="Identity threshold (map -y)" help="Sets an identity threshold for a mapping to be reported." />
111 </when>
112 </conditional>
113 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Smalt produces SAM with several lines of header information" />
114 </inputs>
115 <outputs>
116 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
117 <actions>
118 <conditional name="genomeSource.refGenomeSource">
119 <when value="indexed">
120 <action type="metadata" name="dbkey">
121 <option type="from_data_table" name="smalt_indexes" column="1">
122 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
123 <filter type="param_value" ref="genomeSource.indices" column="0"/>
124 </option>
125 </action>
126 </when>
127 <when value="history">
128 <action type="metadata" name="dbkey">
129 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
130 </action>
131 </when>
132 </conditional>
133 </actions>
134 </data>
135 </outputs>
136 <help>
137
138 **What it does**
139
140 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.
141
142 ------
143
144 **Know what you are doing**
145
146 .. class:: warningmark
147
148 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
149
150 .. __: http://www.sanger.ac.uk/resources/software/smalt/
151
152 ------
153
154 **Input formats**
155
156 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.
157
158 ------
159
160 **A Note on Built-in Reference Genomes**
161
162 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
163
164 ------
165
166 **Outputs**
167
168 The output is in SAM format.
169
170 -------
171
172 **SMALT parameter list**
173
174 This is an exhaustive list of SMALT options:
175
176 For **map**::
177
178 -a
179 Output explicit alignments along with the mappings.
180
181 -c &lt;mincover&gt;
182 Only consider mappings where the k-mer word seeds cover the query read to
183 a minimum extent. If &lt;mincover&gt; is an integer or floating point &gt; 1.0, at
184 least this many bases of the read must be covered by k-mer word seeds. If
185 &lt;mincover&gt; is a floating point &lt;= 1.0, it specifies the fraction of the
186 query read length that must be covered by k-mer word seeds. This option
187 is only valid in conjunction with the '-x' flag.
188
189 -d &lt;scordiff&gt;
190 Set a threshold of the Smith-Waterman alignment score relative to the
191 maximum score. When mapping single reads, all alignments are reported
192 that have Smith-Waterman scores within &lt;scorediff&gt; of the maximum.
193 Mappings with lower scores are skipped. If &lt;scorediff&gt; is set to to a
194 value &lt; 0, all alignments are printed that have scores above the
195 threshold specified with the '-m &lt;minscor&gt;' option.
196 For paired reads, only a value of 0 is supported. With the option '-d 0'
197 all aligments (pairings) with the best score are output. By default
198 (without the option '-d 0') single reads/mates with multiple best mappings
199 are reported as 'not mapped'.
200
201 -f &lt;format&gt;
202 Specifies the output format. &lt;format&gt; can be either 'bam', 'cigar', 'gff',
203 'sam' (default), 'samsoft' or 'ssaha'. Optional extension 'sam:nohead,clip'
204 (see manual)
205
206 -F &lt;inform&gt;
207 Specifies the input format. &lt;inform&gt; can be either 'fastq' (default),
208 'sam' or 'bam' (see: samtools.sourceforge.net). SAM and BAM formats
209 require additional libraries to be installed.
210
211 -g &lt;insfil&gt;
212 Use the distribution of insert sizes stored in the file &lt;insfil&gt;. This
213 file is in ASCII format and can be generated using the 'sample' task see
214 'smalt sample -H' for help).
215
216 -H
217 Print these instructions.
218
219 -i &lt;insertmax&gt;
220 Maximum insert size (only in paired-end mode). The default is 500.
221
222 -j &lt;insertmin&gt;
223 Minimum insert size (only in paired-end mode). The default is 0.
224
225 -l &lt;pairtyp&gt;
226 Type of read pair library. &lt;pairtyp&gt; can be either 'pe', i.e. for
227 the Illumina paired-end library for short inserts (|--&gt; &lt;--|). 'mp'
228 for the Illumina mate-pair library for long inserts (&lt;--| |--&gt;) or
229 'pp' for mates sequenced on the same strand (|--&gt; |--&gt;). 'pe' is the
230 default.
231
232 -m &lt;minscor&gt;
233 Sets an absolute threshold of the Smith-Waterman scores. Mappings with
234 scores below that threshold will not be reported. The default is
235 &lt;minscor&gt; = &lt;wordlen&gt; + &lt;stepsiz&gt; - 1
236
237 -n &lt;nthreads&gt;
238 Run smalt using mutiple threads. &lt;nthread&gt; is the number of additional
239 threads forked from the main thread. The order of the reads in the
240 input files is not preserved for the output unless '-O' is also specified.
241
242 -o &lt;oufilnam&gt;
243 Write mapping output (e.g. SAM lines) to a separate file. If this option
244 is not specified, mappings are written to standard output together with
245 other messages.
246
247 -O
248 Output mappings in the order of the reads in the input files when using
249 multiple threads (option '-n &lt;nthreads&gt;').
250
251 -p
252 Report partial alignments if they are complementary on the read (split
253 reads).
254
255 -q &lt;minbasq&gt;
256 Sets a base quality threshold (0 &lt;= minbasq &lt;= 10, default 0).
257 K-mer words of the read with nucleotides that have a base quality below
258 this threshold are not looked up in the hash index.
259
260 -r &lt;seed&gt;
261 If &lt;seed&gt; &gt;= 0 report an alignment selected at random where there are
262 multiple mappings with the same best alignment score. With &lt;seed&gt; = 0
263 (default) a seed is derived from the current calendar time. If &lt;seed&gt;
264 &lt; 0 reads with multiple best mappings are reported as 'not mapped'.
265
266 -T &lt;tmp_dir&gt;
267 Write temporary files to directory &lt;tmp_dir&gt; (used with input files in
268 SAM/BAM format).
269
270 -w
271 Smith-Waterman scores are complexity weighted.
272
273 -x
274 This flag triggers a more exhaustive search for alignments at the cost
275 of decreased speed. In paired-end mode each mate is mapped independently.
276 (By default the mate with fewer hits in the hash index is mapped first
277 and the vicinity is searched for mappings of its mate.)
278
279 -y &lt;minid&gt;
280 Sets an identity threshold for a mapping to be reported (default: 0).
281 &lt;minid&gt; specifies the number of exactly matching nucleotides either as
282 a positive integer or as a fraction of the read length (&lt;= 1.0).
283
284 </help>
285 </tool>
286
287