0
|
1 <tool id="smalt_wrapper" name="SMALT" version="0.0.1">
|
|
2 <requirements>
|
|
3 <requirement type="package" version="0.7.1">smalt</requirement>
|
|
4 </requirements>
|
|
5 <description>maps query reads onto the reference sequences</description>
|
|
6 <command interpreter="python">
|
|
7 smalt_wrapper.py
|
|
8 --threads="4"
|
|
9
|
|
10 ## reference source
|
|
11 --fileSource=$genomeSource.refGenomeSource
|
|
12 #if $genomeSource.refGenomeSource == "history":
|
|
13 ##build index on the fly
|
|
14 --ref="${genomeSource.ownFile}"
|
|
15 --dbkey=$dbkey
|
|
16 #else:
|
|
17 ##use precomputed indexes
|
|
18 --ref="${genomeSource.indices.fields.path}"
|
|
19 --do_not_build_index
|
|
20 #end if
|
|
21
|
|
22 ## input file(s)
|
|
23 --input1=$paired.input1
|
|
24 #if $paired.sPaired == "paired":
|
|
25 --input2=$paired.input2
|
|
26 #end if
|
|
27
|
|
28 ## output file
|
|
29 --output=$output
|
|
30
|
|
31 ## run parameters
|
|
32 --genAlignType=$paired.sPaired
|
|
33 --params=$params.source_select
|
|
34 #if $params.source_select != "pre_set":
|
|
35 --scorDiff=$params.scorDiff
|
|
36 #if $paired.sPaired == "paired":
|
|
37 --insertMax=$params.insertMax
|
|
38 --insertMin=$params.insertMin
|
|
39 --pairTyp=$params.pairTyp
|
|
40 #end if
|
|
41 --minScor=$params.minScor
|
|
42 --partialAlignments=$params.partialAlignments
|
|
43 --minBasq=$params.minBasq
|
|
44 --seed=$params.seed
|
|
45 --complexityWeighted=$params.complexityWeighted
|
|
46 --exhaustiveSearch=$params.cExhaustiveSearch.exhaustiveSearch
|
|
47 #if $params.cExhaustiveSearch.exhaustiveSearch == "true"
|
|
48 --minCover=$params.cExhaustiveSearch.minCover
|
|
49 #end if
|
|
50 --minId=$params.minId
|
|
51 #end if
|
|
52
|
|
53 ## suppress output SAM header
|
|
54 --suppressHeader=$suppressHeader
|
|
55 </command>
|
|
56 <inputs>
|
|
57 <conditional name="genomeSource">
|
|
58 <param name="refGenomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?">
|
|
59 <option value="indexed">Use a built-in index</option>
|
|
60 <option value="history">Use one from the history</option>
|
|
61 </param>
|
|
62 <when value="indexed">
|
|
63 <param name="indices" type="select" label="Select a reference genome">
|
|
64 <options from_data_table="smalt_indexes">
|
|
65 <filter type="sort_by" column="2" />
|
|
66 <validator type="no_options" message="No indexes are available" />
|
|
67 </options>
|
|
68 </param>
|
|
69 </when>
|
|
70 <when value="history">
|
|
71 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference from history" />
|
|
72 </when>
|
|
73 </conditional>
|
|
74 <conditional name="paired">
|
|
75 <param name="sPaired" type="select" label="Is this library mate-paired?">
|
|
76 <option value="single">Single-end</option>
|
|
77 <option value="paired">Paired-end</option>
|
|
78 </param>
|
|
79 <when value="single">
|
|
80 <param name="input1" type="data" format="fastqsanger" label="FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
81 </when>
|
|
82 <when value="paired">
|
|
83 <param name="input1" type="data" format="fastqsanger" label="Forward FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
84 <param name="input2" type="data" format="fastqsanger" label="Reverse FASTQ file" help="FASTQ with Sanger-scaled quality values (fastqsanger)" />
|
|
85 </when>
|
|
86 </conditional>
|
|
87 <conditional name="params">
|
|
88 <param name="source_select" type="select" label="Smalt settings to use" help="For most mapping needs use Commonly Used settings. If you want full control use Full Parameter List">
|
|
89 <option value="pre_set">Commonly Used</option>
|
|
90 <option value="full">Full Parameter List</option>
|
|
91 </param>
|
|
92 <when value="pre_set" />
|
|
93 <when value="full">
|
|
94 <conditional name="cExhaustiveSearch">
|
|
95 <param name="exhaustiveSearch" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Do exhaustive search? (map -x)" help="This flag triggers a more exhaustive search for alignments at the cost of decreased speed." />
|
|
96 <when value="true">
|
|
97 <param name="minCover" type="float" value="0" label="Minimum cover (map -c)" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent." />
|
|
98 </when>
|
|
99 <when value="no" />
|
|
100 </conditional>
|
|
101 <param name="scorDiff" type="integer" value="0" label="Score diff (map -d)" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score." />
|
|
102 <param name="insertMax" type="integer" value="500" label="Maximum insert size (map -i)" help="Only in paired-end mode." />
|
|
103 <param name="insertMin" type="integer" value="0" label="Minimum insert size (map -j)" help="Only in paired-end mode." />
|
|
104 <param name="pairTyp" type="text" size="2" value="pe" label="Type of read pair library (map -l)" help="Can be either 'pe', 'mp' or 'pp'." />
|
|
105 <param name="minScor" type="integer" value="0" label="Minimum score (map -m)" help="Sets an absolute threshold of the Smith-Waterman scores." />
|
|
106 <param name="partialAlignments" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Partial alignments (map -p)" help="Report partial alignments if they are complementary on the read (split reads)." />
|
|
107 <param name="minBasq" type="integer" value="0" label="Base quality threshold (map -q)" help="Sets a base quality threshold (0 <= minbasq <= 10, default 0)." />
|
|
108 <param name="seed" type="integer" value="0" label="Seed (map -r)" help="See below." />
|
|
109 <param name="complexityWeighted" type="boolean" truevalue="true" falsevalue="false" checked="no" label="Complexity weighted (map -w)" help="Smith-Waterman scores are complexity weighted." />
|
|
110 <param name="minId" type="float" value="0" label="Identity threshold (map -y)" help="Sets an identity threshold for a mapping to be reported." />
|
|
111 </when>
|
|
112 </conditional>
|
|
113 <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file" help="Smalt produces SAM with several lines of header information" />
|
|
114 </inputs>
|
|
115 <outputs>
|
|
116 <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads">
|
|
117 <actions>
|
|
118 <conditional name="genomeSource.refGenomeSource">
|
|
119 <when value="indexed">
|
|
120 <action type="metadata" name="dbkey">
|
|
121 <option type="from_data_table" name="smalt_indexes" column="1">
|
|
122 <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
|
|
123 <filter type="param_value" ref="genomeSource.indices" column="0"/>
|
|
124 </option>
|
|
125 </action>
|
|
126 </when>
|
|
127 <when value="history">
|
|
128 <action type="metadata" name="dbkey">
|
|
129 <option type="from_param" name="genomeSource.ownFile" param_attribute="dbkey" />
|
|
130 </action>
|
|
131 </when>
|
|
132 </conditional>
|
|
133 </actions>
|
|
134 </data>
|
|
135 </outputs>
|
|
136 <help>
|
|
137
|
|
138 **What it does**
|
|
139
|
|
140 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads.
|
|
141
|
|
142 ------
|
|
143
|
|
144 **Know what you are doing**
|
|
145
|
|
146 .. class:: warningmark
|
|
147
|
|
148 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy.
|
|
149
|
|
150 .. __: http://www.sanger.ac.uk/resources/software/smalt/
|
|
151
|
|
152 ------
|
|
153
|
|
154 **Input formats**
|
|
155
|
|
156 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files.
|
|
157
|
|
158 ------
|
|
159
|
|
160 **A Note on Built-in Reference Genomes**
|
|
161
|
|
162 The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY.
|
|
163
|
|
164 ------
|
|
165
|
|
166 **Outputs**
|
|
167
|
|
168 The output is in SAM format.
|
|
169
|
|
170 -------
|
|
171
|
|
172 **SMALT parameter list**
|
|
173
|
|
174 This is an exhaustive list of SMALT options:
|
|
175
|
|
176 For **map**::
|
|
177
|
|
178 -a
|
|
179 Output explicit alignments along with the mappings.
|
|
180
|
|
181 -c <mincover>
|
|
182 Only consider mappings where the k-mer word seeds cover the query read to
|
|
183 a minimum extent. If <mincover> is an integer or floating point > 1.0, at
|
|
184 least this many bases of the read must be covered by k-mer word seeds. If
|
|
185 <mincover> is a floating point <= 1.0, it specifies the fraction of the
|
|
186 query read length that must be covered by k-mer word seeds. This option
|
|
187 is only valid in conjunction with the '-x' flag.
|
|
188
|
|
189 -d <scordiff>
|
|
190 Set a threshold of the Smith-Waterman alignment score relative to the
|
|
191 maximum score. When mapping single reads, all alignments are reported
|
|
192 that have Smith-Waterman scores within <scorediff> of the maximum.
|
|
193 Mappings with lower scores are skipped. If <scorediff> is set to to a
|
|
194 value < 0, all alignments are printed that have scores above the
|
|
195 threshold specified with the '-m <minscor>' option.
|
|
196 For paired reads, only a value of 0 is supported. With the option '-d 0'
|
|
197 all aligments (pairings) with the best score are output. By default
|
|
198 (without the option '-d 0') single reads/mates with multiple best mappings
|
|
199 are reported as 'not mapped'.
|
|
200
|
|
201 -f <format>
|
|
202 Specifies the output format. <format> can be either 'bam', 'cigar', 'gff',
|
|
203 'sam' (default), 'samsoft' or 'ssaha'. Optional extension 'sam:nohead,clip'
|
|
204 (see manual)
|
|
205
|
|
206 -F <inform>
|
|
207 Specifies the input format. <inform> can be either 'fastq' (default),
|
|
208 'sam' or 'bam' (see: samtools.sourceforge.net). SAM and BAM formats
|
|
209 require additional libraries to be installed.
|
|
210
|
|
211 -g <insfil>
|
|
212 Use the distribution of insert sizes stored in the file <insfil>. This
|
|
213 file is in ASCII format and can be generated using the 'sample' task see
|
|
214 'smalt sample -H' for help).
|
|
215
|
|
216 -H
|
|
217 Print these instructions.
|
|
218
|
|
219 -i <insertmax>
|
|
220 Maximum insert size (only in paired-end mode). The default is 500.
|
|
221
|
|
222 -j <insertmin>
|
|
223 Minimum insert size (only in paired-end mode). The default is 0.
|
|
224
|
|
225 -l <pairtyp>
|
|
226 Type of read pair library. <pairtyp> can be either 'pe', i.e. for
|
|
227 the Illumina paired-end library for short inserts (|--> <--|). 'mp'
|
|
228 for the Illumina mate-pair library for long inserts (<--| |-->) or
|
|
229 'pp' for mates sequenced on the same strand (|--> |-->). 'pe' is the
|
|
230 default.
|
|
231
|
|
232 -m <minscor>
|
|
233 Sets an absolute threshold of the Smith-Waterman scores. Mappings with
|
|
234 scores below that threshold will not be reported. The default is
|
|
235 <minscor> = <wordlen> + <stepsiz> - 1
|
|
236
|
|
237 -n <nthreads>
|
|
238 Run smalt using mutiple threads. <nthread> is the number of additional
|
|
239 threads forked from the main thread. The order of the reads in the
|
|
240 input files is not preserved for the output unless '-O' is also specified.
|
|
241
|
|
242 -o <oufilnam>
|
|
243 Write mapping output (e.g. SAM lines) to a separate file. If this option
|
|
244 is not specified, mappings are written to standard output together with
|
|
245 other messages.
|
|
246
|
|
247 -O
|
|
248 Output mappings in the order of the reads in the input files when using
|
|
249 multiple threads (option '-n <nthreads>').
|
|
250
|
|
251 -p
|
|
252 Report partial alignments if they are complementary on the read (split
|
|
253 reads).
|
|
254
|
|
255 -q <minbasq>
|
|
256 Sets a base quality threshold (0 <= minbasq <= 10, default 0).
|
|
257 K-mer words of the read with nucleotides that have a base quality below
|
|
258 this threshold are not looked up in the hash index.
|
|
259
|
|
260 -r <seed>
|
|
261 If <seed> >= 0 report an alignment selected at random where there are
|
|
262 multiple mappings with the same best alignment score. With <seed> = 0
|
|
263 (default) a seed is derived from the current calendar time. If <seed>
|
|
264 < 0 reads with multiple best mappings are reported as 'not mapped'.
|
|
265
|
|
266 -T <tmp_dir>
|
|
267 Write temporary files to directory <tmp_dir> (used with input files in
|
|
268 SAM/BAM format).
|
|
269
|
|
270 -w
|
|
271 Smith-Waterman scores are complexity weighted.
|
|
272
|
|
273 -x
|
|
274 This flag triggers a more exhaustive search for alignments at the cost
|
|
275 of decreased speed. In paired-end mode each mate is mapped independently.
|
|
276 (By default the mate with fewer hits in the hash index is mapped first
|
|
277 and the vicinity is searched for mappings of its mate.)
|
|
278
|
|
279 -y <minid>
|
|
280 Sets an identity threshold for a mapping to be reported (default: 0).
|
|
281 <minid> specifies the number of exactly matching nucleotides either as
|
|
282 a positive integer or as a fraction of the read length (<= 1.0).
|
|
283
|
|
284 </help>
|
|
285 </tool>
|
|
286
|
|
287
|