|
0
|
1 <tool id="snap_caller" name="SNAP Read Alignment">
|
|
|
2 <description>Map sequence reads to a reference genome using SNAP</description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package" version="3.4.1">python3</requirement>
|
|
|
5 <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
|
|
|
6 </requirements>
|
|
|
7 <command>
|
|
|
8 mimodd snap_batch -s
|
|
|
9 ## SNAP calls (considering different cases)
|
|
|
10
|
|
|
11 #for $i in $datasets
|
|
|
12 "snap ${i.mode_choose.mode} $ref_genome
|
|
|
13 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
|
|
|
14 ${i.mode_choose.input.ifile1} ${i.mode_choose.input.ifile2}
|
|
|
15 #else:
|
|
|
16 ${i.mode_choose.input.ifile}
|
|
|
17 #end if
|
|
|
18 --outputfile $outputfile --iformat ${i.mode_choose.input.iformat} --oformat $oformat
|
|
|
19 --idx_seedsize $set.seedsize
|
|
|
20 --idx_slack $set.slack --maxseeds $set.maxseeds --maxhits $set.maxhits --clipping=$set.clipping --maxdist $set.maxdist --confdiff $set.confdiff
|
|
|
21 #if $i.mode_choose.input.header:
|
|
|
22 --header ${i.mode_choose.input.header}
|
|
|
23 #end if
|
|
|
24 #if $str($i.mode_choose.mode) == "paired":
|
|
|
25 --spacing $set.sp_min $set.sp_max
|
|
|
26 #end if
|
|
|
27 #if $str($set.selectivity) != "off":
|
|
|
28 --selectivity $set.selectivity
|
|
|
29 #end if
|
|
|
30 #if $str($set.filter_output) != "off":
|
|
|
31 --filter_output $set.filter_output
|
|
|
32 #end if
|
|
|
33 #if $str($set.sort) != "off":
|
|
|
34 --sort $set.sort
|
|
|
35 #end if
|
|
|
36 #if $str($set.mmatch_notation) == "general":
|
|
|
37 -M
|
|
|
38 #end if
|
|
|
39 --max_mate_overlap $set.max_mate_overlap
|
|
|
40 --verbose
|
|
|
41 "
|
|
|
42 #end for
|
|
|
43 </command>
|
|
|
44
|
|
|
45 <inputs>
|
|
|
46 ## mandatory arguments (and mode-conditionals)
|
|
|
47
|
|
|
48 <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against; a SNAP index will be built by the tool automatically."/>
|
|
|
49
|
|
|
50 <repeat name="datasets" title="datasets" default="1" min="1">
|
|
|
51 <conditional name="mode_choose">
|
|
|
52 <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
|
|
|
53 <option value="single">single-end</option>
|
|
|
54 <option value="paired">paired-end</option>
|
|
|
55 </param>
|
|
|
56
|
|
|
57 <when value="single">
|
|
|
58 <conditional name="input">
|
|
|
59 <param name="iformat" type="select" label="input file format">
|
|
|
60 <option value="bam">BAM</option>
|
|
|
61 <option value="sam">SAM</option>
|
|
|
62 <option value="gz">gz</option>
|
|
|
63 <option value="fastq">fastq</option>
|
|
|
64 </param>
|
|
|
65 <when value="bam">
|
|
|
66 <param name="ifile" type="data" format="bam" label="input file"/>
|
|
|
67 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
|
|
|
68 </when>
|
|
|
69 <when value="sam">
|
|
|
70 <param name="ifile" type="data" format="sam" label="input file"/>
|
|
|
71 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
|
|
|
72 </when>
|
|
|
73 <when value="gz">
|
|
|
74 <param name="ifile" type="data" label="input file"/>
|
|
|
75 <param name="header" type="data" format="sam" label="header file" />
|
|
|
76 </when>
|
|
|
77 <when value="fastq">
|
|
|
78 <param name="ifile" type="data" format="fastq" label="input file"/>
|
|
|
79 <param name="header" type="data" format="sam" label="header file" />
|
|
|
80 </when>
|
|
|
81 </conditional>
|
|
|
82 </when>
|
|
|
83 <when value="paired">
|
|
|
84 <conditional name="input">
|
|
|
85 <param name="iformat" type="select" label="input file format">
|
|
|
86 <option value="bam">BAM</option>
|
|
|
87 <option value="sam">SAM</option>
|
|
|
88 <option value="gz">gz</option>
|
|
|
89 <option value="fastq">fastq</option>
|
|
|
90 </param>
|
|
|
91 <when value="bam">
|
|
|
92 <param name="ifile" type="data" format="bam" label="input file"/>
|
|
|
93 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
|
|
|
94 </when>
|
|
|
95 <when value="sam">
|
|
|
96 <param name="ifile" type="data" format="sam" label="input file"/>
|
|
|
97 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
|
|
|
98 </when>
|
|
|
99 <when value="fastq">
|
|
|
100 <param name="ifile1" type="data" format="fastq" label="input file 1"/>
|
|
|
101 <param name="ifile2" type="data" format="fastq" label="input file 2"/>
|
|
|
102 <param name="header" type="data" format="sam" label="header file" />
|
|
|
103 </when>
|
|
|
104 <when value="gz">
|
|
|
105 <param name="ifile1" type="data" label="input file 1"/>
|
|
|
106 <param name="ifile2" type="data" label="input file 2"/>
|
|
|
107 <param name="header" type="data" format="sam" label="header file" />
|
|
|
108 </when>
|
|
|
109 </conditional>
|
|
|
110 </when>
|
|
|
111 </conditional>
|
|
|
112 </repeat>
|
|
|
113
|
|
|
114 <param name="oformat" type="select" label="output file format">
|
|
|
115 <option value="bam">BAM</option>
|
|
|
116 <option value="sam">SAM</option>
|
|
|
117 </param>
|
|
|
118
|
|
|
119 ## optional arguments
|
|
|
120
|
|
|
121 <conditional name="set">
|
|
|
122 <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the SNAP manual first.">
|
|
|
123 <option value="default">default settings</option>
|
|
|
124 <option value="change">change settings</option>
|
|
|
125 </param>
|
|
|
126
|
|
|
127 ## default settings
|
|
|
128
|
|
|
129 <when value="default">
|
|
|
130 <param name="seedsize" type="hidden" value="20"/>
|
|
|
131 <param name="slack" type="hidden" value="0.3"/>
|
|
|
132 <param name="sp_min" type="hidden" value="100"/>
|
|
|
133 <param name="sp_max" type="hidden" value="10000"/>
|
|
|
134 <param name="maxdist" type="hidden" value="8"/>
|
|
|
135 <param name="confdiff" type="hidden" value="2"/>
|
|
|
136
|
|
|
137 <param name="maxseeds" type="hidden" value="25"/>
|
|
|
138 <param name="maxhits" type="hidden" value="250"/>
|
|
|
139 <param name="clipping" type="hidden" value="++"/>
|
|
|
140
|
|
|
141 <param name="selectivity" type="hidden" value="off"/>
|
|
|
142 <param name="filter_output" type="hidden" value="off"/>
|
|
|
143 <param name="sort" type="hidden" value="0"/>
|
|
|
144 <param name="mmatch_notation" type="hidden" value="general"/>
|
|
|
145 <param name="max_mate_overlap" type="hidden" value="0" />
|
|
|
146 </when>
|
|
|
147
|
|
|
148 ## change settings
|
|
|
149
|
|
|
150 <when value="change">
|
|
|
151 <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
|
|
|
152 <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>
|
|
|
153
|
|
|
154 ## paired-end specific options
|
|
|
155 <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
|
|
|
156 <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
|
|
|
157 <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
|
|
|
158
|
|
|
159 <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
|
|
|
160 <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>
|
|
|
161 <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
|
|
|
162 <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
|
|
|
163 <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
|
|
|
164 <option value="++">from back and front</option>
|
|
|
165 <option value="-+">from back only</option>
|
|
|
166 <option value="+-">from front only</option>
|
|
|
167 <option value="--">no clipping</option>
|
|
|
168 </param>
|
|
|
169 <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
|
|
|
170 <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
|
|
|
171 <option value="off">no filtering</option>
|
|
|
172 <option value="a">aligned only</option>
|
|
|
173 <option value="s">single-aligned only</option>
|
|
|
174 <option value="u">unaligned only</option>
|
|
|
175 </param>
|
|
|
176 <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
|
|
|
177 <option value="0">sort by read coordinates</option>
|
|
|
178 <option value="off">no sorting</option>
|
|
|
179 </param>
|
|
|
180 <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
|
|
|
181 <option value="general">use M for both matches and mismatches</option>
|
|
|
182 <option value="differentiate">use = for matches, X for mismatches</option>
|
|
|
183 </param>
|
|
|
184 </when>
|
|
|
185 </conditional>
|
|
|
186 </inputs>
|
|
|
187
|
|
|
188 <outputs>
|
|
|
189 <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
|
|
|
190 <change_format>
|
|
|
191 <when input="oformat" value="sam" format="sam"/>
|
|
|
192 </change_format>
|
|
|
193 </data>
|
|
|
194 </outputs>
|
|
|
195
|
|
|
196 <help>
|
|
|
197 .. class:: infomark
|
|
|
198
|
|
|
199 **What it does**
|
|
|
200
|
|
|
201 The tool aligns the sequenced reads in an arbitrary number of input files against a common reference genome and stores the results in a single, possibly multi-sample output file.
|
|
|
202
|
|
|
203 It does so by using the ultrafast, hashtable-based aligner SNAP, but unless you want to change aligner-specific options you do not have to know anything about this implementation detail.
|
|
|
204
|
|
|
205 **Notes:**
|
|
|
206
|
|
|
207 1) The tool requires that each input file contains adequate header information (i.e. metadata about the read groups and samples it encodes). The *custom header file* is offered as an **optional choice** for input files that **may** contain such header information, but you **must** specify it if your specific file does not provide the information. You **can** also provide a header file for an input file with header information, in which case the custom header will overwrite the existing header of the input file.
|
|
|
208
|
|
|
209 2) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap_batch``.
|
|
|
210
|
|
|
211 </help>
|
|
|
212 </tool>
|
|
|
213
|
|
|
214
|
|
|
215
|
|
|
216
|
|
|
217
|
|
|
218
|
|
|
219
|
|
|
220
|
|
|
221
|