comparison snap_caller.xml @ 0:7da2c9654a83 draft default tip

Uploaded
author wolma
date Tue, 12 Aug 2014 11:26:15 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7da2c9654a83
1 <tool id="snap_caller" name="SNAP Read Alignment">
2 <description>Map sequence reads to a reference genome using SNAP</description>
3 <requirements>
4 <requirement type="package" version="3.4.1">python3</requirement>
5 <requirement type="package" version="0.1.3_9af04e0e9125">MiModD</requirement>
6 </requirements>
7 <command>
8 mimodd snap_batch -s
9 ## SNAP calls (considering different cases)
10
11 #for $i in $datasets
12 "snap ${i.mode_choose.mode} $ref_genome
13 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
14 ${i.mode_choose.input.ifile1} ${i.mode_choose.input.ifile2}
15 #else:
16 ${i.mode_choose.input.ifile}
17 #end if
18 --outputfile $outputfile --iformat ${i.mode_choose.input.iformat} --oformat $oformat
19 --idx_seedsize $set.seedsize
20 --idx_slack $set.slack --maxseeds $set.maxseeds --maxhits $set.maxhits --clipping=$set.clipping --maxdist $set.maxdist --confdiff $set.confdiff
21 #if $i.mode_choose.input.header:
22 --header ${i.mode_choose.input.header}
23 #end if
24 #if $str($i.mode_choose.mode) == "paired":
25 --spacing $set.sp_min $set.sp_max
26 #end if
27 #if $str($set.selectivity) != "off":
28 --selectivity $set.selectivity
29 #end if
30 #if $str($set.filter_output) != "off":
31 --filter_output $set.filter_output
32 #end if
33 #if $str($set.sort) != "off":
34 --sort $set.sort
35 #end if
36 #if $str($set.mmatch_notation) == "general":
37 -M
38 #end if
39 --max_mate_overlap $set.max_mate_overlap
40 --verbose
41 "
42 #end for
43 </command>
44
45 <inputs>
46 ## mandatory arguments (and mode-conditionals)
47
48 <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against; a SNAP index will be built by the tool automatically."/>
49
50 <repeat name="datasets" title="datasets" default="1" min="1">
51 <conditional name="mode_choose">
52 <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
53 <option value="single">single-end</option>
54 <option value="paired">paired-end</option>
55 </param>
56
57 <when value="single">
58 <conditional name="input">
59 <param name="iformat" type="select" label="input file format">
60 <option value="bam">BAM</option>
61 <option value="sam">SAM</option>
62 <option value="gz">gz</option>
63 <option value="fastq">fastq</option>
64 </param>
65 <when value="bam">
66 <param name="ifile" type="data" format="bam" label="input file"/>
67 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
68 </when>
69 <when value="sam">
70 <param name="ifile" type="data" format="sam" label="input file"/>
71 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
72 </when>
73 <when value="gz">
74 <param name="ifile" type="data" label="input file"/>
75 <param name="header" type="data" format="sam" label="header file" />
76 </when>
77 <when value="fastq">
78 <param name="ifile" type="data" format="fastq" label="input file"/>
79 <param name="header" type="data" format="sam" label="header file" />
80 </when>
81 </conditional>
82 </when>
83 <when value="paired">
84 <conditional name="input">
85 <param name="iformat" type="select" label="input file format">
86 <option value="bam">BAM</option>
87 <option value="sam">SAM</option>
88 <option value="gz">gz</option>
89 <option value="fastq">fastq</option>
90 </param>
91 <when value="bam">
92 <param name="ifile" type="data" format="bam" label="input file"/>
93 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
94 </when>
95 <when value="sam">
96 <param name="ifile" type="data" format="sam" label="input file"/>
97 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
98 </when>
99 <when value="fastq">
100 <param name="ifile1" type="data" format="fastq" label="input file 1"/>
101 <param name="ifile2" type="data" format="fastq" label="input file 2"/>
102 <param name="header" type="data" format="sam" label="header file" />
103 </when>
104 <when value="gz">
105 <param name="ifile1" type="data" label="input file 1"/>
106 <param name="ifile2" type="data" label="input file 2"/>
107 <param name="header" type="data" format="sam" label="header file" />
108 </when>
109 </conditional>
110 </when>
111 </conditional>
112 </repeat>
113
114 <param name="oformat" type="select" label="output file format">
115 <option value="bam">BAM</option>
116 <option value="sam">SAM</option>
117 </param>
118
119 ## optional arguments
120
121 <conditional name="set">
122 <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the SNAP manual first.">
123 <option value="default">default settings</option>
124 <option value="change">change settings</option>
125 </param>
126
127 ## default settings
128
129 <when value="default">
130 <param name="seedsize" type="hidden" value="20"/>
131 <param name="slack" type="hidden" value="0.3"/>
132 <param name="sp_min" type="hidden" value="100"/>
133 <param name="sp_max" type="hidden" value="10000"/>
134 <param name="maxdist" type="hidden" value="8"/>
135 <param name="confdiff" type="hidden" value="2"/>
136
137 <param name="maxseeds" type="hidden" value="25"/>
138 <param name="maxhits" type="hidden" value="250"/>
139 <param name="clipping" type="hidden" value="++"/>
140
141 <param name="selectivity" type="hidden" value="off"/>
142 <param name="filter_output" type="hidden" value="off"/>
143 <param name="sort" type="hidden" value="0"/>
144 <param name="mmatch_notation" type="hidden" value="general"/>
145 <param name="max_mate_overlap" type="hidden" value="0" />
146 </when>
147
148 ## change settings
149
150 <when value="change">
151 <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
152 <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>
153
154 ## paired-end specific options
155 <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
156 <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
157 <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
158
159 <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
160 <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>
161 <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
162 <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
163 <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
164 <option value="++">from back and front</option>
165 <option value="-+">from back only</option>
166 <option value="+-">from front only</option>
167 <option value="--">no clipping</option>
168 </param>
169 <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
170 <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
171 <option value="off">no filtering</option>
172 <option value="a">aligned only</option>
173 <option value="s">single-aligned only</option>
174 <option value="u">unaligned only</option>
175 </param>
176 <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
177 <option value="0">sort by read coordinates</option>
178 <option value="off">no sorting</option>
179 </param>
180 <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
181 <option value="general">use M for both matches and mismatches</option>
182 <option value="differentiate">use = for matches, X for mismatches</option>
183 </param>
184 </when>
185 </conditional>
186 </inputs>
187
188 <outputs>
189 <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
190 <change_format>
191 <when input="oformat" value="sam" format="sam"/>
192 </change_format>
193 </data>
194 </outputs>
195
196 <help>
197 .. class:: infomark
198
199 **What it does**
200
201 The tool aligns the sequenced reads in an arbitrary number of input files against a common reference genome and stores the results in a single, possibly multi-sample output file.
202
203 It does so by using the ultrafast, hashtable-based aligner SNAP, but unless you want to change aligner-specific options you do not have to know anything about this implementation detail.
204
205 **Notes:**
206
207 1) The tool requires that each input file contains adequate header information (i.e. metadata about the read groups and samples it encodes). The *custom header file* is offered as an **optional choice** for input files that **may** contain such header information, but you **must** specify it if your specific file does not provide the information. You **can** also provide a header file for an input file with header information, in which case the custom header will overwrite the existing header of the input file.
208
209 2) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap_batch``.
210
211 </help>
212 </tool>
213
214
215
216
217
218
219
220
221