annotate snap_caller.xml @ 0:2f47755c35da draft default tip

Uploaded
author wolma
date Tue, 09 Dec 2014 17:28:50 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f47755c35da Uploaded
wolma
parents:
diff changeset
1 <tool id="read_alignment" name="SNAP Read Alignment">
2f47755c35da Uploaded
wolma
parents:
diff changeset
2 <description>Map sequence reads to a reference genome using SNAP</description>
2f47755c35da Uploaded
wolma
parents:
diff changeset
3 <requirements>
2f47755c35da Uploaded
wolma
parents:
diff changeset
4 <requirement type="package" version="0.1.5">mimodd</requirement>
2f47755c35da Uploaded
wolma
parents:
diff changeset
5 </requirements>
2f47755c35da Uploaded
wolma
parents:
diff changeset
6 <version_command>mimodd version -q</version_command>
2f47755c35da Uploaded
wolma
parents:
diff changeset
7 <command>
2f47755c35da Uploaded
wolma
parents:
diff changeset
8 mimodd snap-batch -s
2f47755c35da Uploaded
wolma
parents:
diff changeset
9 ## SNAP calls (considering different cases)
2f47755c35da Uploaded
wolma
parents:
diff changeset
10
2f47755c35da Uploaded
wolma
parents:
diff changeset
11 #for $i in $datasets
2f47755c35da Uploaded
wolma
parents:
diff changeset
12 "snap ${i.mode_choose.mode} $ref_genome
2f47755c35da Uploaded
wolma
parents:
diff changeset
13 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
2f47755c35da Uploaded
wolma
parents:
diff changeset
14 ${i.mode_choose.input.ifile1} ${i.mode_choose.input.ifile2}
2f47755c35da Uploaded
wolma
parents:
diff changeset
15 #else:
2f47755c35da Uploaded
wolma
parents:
diff changeset
16 ${i.mode_choose.input.ifile}
2f47755c35da Uploaded
wolma
parents:
diff changeset
17 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
18 --ofile $outputfile --iformat ${i.mode_choose.input.iformat} --oformat $oformat
2f47755c35da Uploaded
wolma
parents:
diff changeset
19 --idx-seedsize $set.seedsize
2f47755c35da Uploaded
wolma
parents:
diff changeset
20 --idx-slack $set.slack --maxseeds $set.maxseeds --maxhits $set.maxhits --clipping=$set.clipping --maxdist $set.maxdist --confdiff $set.confdiff --confadapt $set.confadpt
2f47755c35da Uploaded
wolma
parents:
diff changeset
21 #if $i.mode_choose.input.header:
2f47755c35da Uploaded
wolma
parents:
diff changeset
22 --header ${i.mode_choose.input.header}
2f47755c35da Uploaded
wolma
parents:
diff changeset
23 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
24 #if $str($i.mode_choose.mode) == "paired":
2f47755c35da Uploaded
wolma
parents:
diff changeset
25 --spacing $set.sp_min $set.sp_max
2f47755c35da Uploaded
wolma
parents:
diff changeset
26 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
27 #if $str($set.selectivity) != "off":
2f47755c35da Uploaded
wolma
parents:
diff changeset
28 --selectivity $set.selectivity
2f47755c35da Uploaded
wolma
parents:
diff changeset
29 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
30 #if $str($set.filter_output) != "off":
2f47755c35da Uploaded
wolma
parents:
diff changeset
31 --filter-output $set.filter_output
2f47755c35da Uploaded
wolma
parents:
diff changeset
32 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
33 #if $str($set.sort) != "off":
2f47755c35da Uploaded
wolma
parents:
diff changeset
34 --sort $set.sort
2f47755c35da Uploaded
wolma
parents:
diff changeset
35 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
36 #if $str($set.mmatch_notation) == "general":
2f47755c35da Uploaded
wolma
parents:
diff changeset
37 -M
2f47755c35da Uploaded
wolma
parents:
diff changeset
38 #end if
2f47755c35da Uploaded
wolma
parents:
diff changeset
39 --max-mate-overlap $set.max_mate_overlap
2f47755c35da Uploaded
wolma
parents:
diff changeset
40 --verbose
2f47755c35da Uploaded
wolma
parents:
diff changeset
41 "
2f47755c35da Uploaded
wolma
parents:
diff changeset
42 #end for
2f47755c35da Uploaded
wolma
parents:
diff changeset
43 </command>
2f47755c35da Uploaded
wolma
parents:
diff changeset
44
2f47755c35da Uploaded
wolma
parents:
diff changeset
45 <inputs>
2f47755c35da Uploaded
wolma
parents:
diff changeset
46 ## mandatory arguments (and mode-conditionals)
2f47755c35da Uploaded
wolma
parents:
diff changeset
47
2f47755c35da Uploaded
wolma
parents:
diff changeset
48 <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
49
2f47755c35da Uploaded
wolma
parents:
diff changeset
50 <repeat name="datasets" title="datasets" default="1" min="1">
2f47755c35da Uploaded
wolma
parents:
diff changeset
51 <conditional name="mode_choose">
2f47755c35da Uploaded
wolma
parents:
diff changeset
52 <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
2f47755c35da Uploaded
wolma
parents:
diff changeset
53 <option value="single">single-end</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
54 <option value="paired">paired-end</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
55 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
56
2f47755c35da Uploaded
wolma
parents:
diff changeset
57 <when value="single">
2f47755c35da Uploaded
wolma
parents:
diff changeset
58 <conditional name="input">
2f47755c35da Uploaded
wolma
parents:
diff changeset
59 <param name="iformat" type="select" label="input file format">
2f47755c35da Uploaded
wolma
parents:
diff changeset
60 <option value="bam">BAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
61 <option value="sam">SAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
62 <option value="gz">gz</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
63 <option value="fastq">fastq</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
64 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
65 <when value="bam">
2f47755c35da Uploaded
wolma
parents:
diff changeset
66 <param name="ifile" type="data" format="bam" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
67 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
68 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
69 <when value="sam">
2f47755c35da Uploaded
wolma
parents:
diff changeset
70 <param name="ifile" type="data" format="sam" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
71 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
72 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
73 <when value="gz">
2f47755c35da Uploaded
wolma
parents:
diff changeset
74 <param name="ifile" type="data" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
75 <param name="header" type="data" format="sam" label="header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
76 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
77 <when value="fastq">
2f47755c35da Uploaded
wolma
parents:
diff changeset
78 <param name="ifile" type="data" format="fastq" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
79 <param name="header" type="data" format="sam" label="header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
80 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
81 </conditional>
2f47755c35da Uploaded
wolma
parents:
diff changeset
82 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
83 <when value="paired">
2f47755c35da Uploaded
wolma
parents:
diff changeset
84 <conditional name="input">
2f47755c35da Uploaded
wolma
parents:
diff changeset
85 <param name="iformat" type="select" label="input file format">
2f47755c35da Uploaded
wolma
parents:
diff changeset
86 <option value="bam">BAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
87 <option value="sam">SAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
88 <option value="gz">gz</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
89 <option value="fastq">fastq</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
90 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
91 <when value="bam">
2f47755c35da Uploaded
wolma
parents:
diff changeset
92 <param name="ifile" type="data" format="bam" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
93 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
94 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
95 <when value="sam">
2f47755c35da Uploaded
wolma
parents:
diff changeset
96 <param name="ifile" type="data" format="sam" label="input file"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
97 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
98 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
99 <when value="fastq">
2f47755c35da Uploaded
wolma
parents:
diff changeset
100 <param name="ifile1" type="data" format="fastq" label="inputfile with the first set of reads of paired-end data"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
101 <param name="ifile2" type="data" format="fastq" label="inputfile with the second set of reads of paired-end data"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
102 <param name="header" type="data" format="sam" label="header file" help="required" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
103 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
104 <when value="gz">
2f47755c35da Uploaded
wolma
parents:
diff changeset
105 <param name="ifile1" type="data" label="inputfile with the first set of reads of paired-end data"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
106 <param name="ifile2" type="data" label="inputfile with the second set of reads of paired-end data"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
107 <param name="header" type="data" format="sam" label="header file" help="required" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
108 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
109 </conditional>
2f47755c35da Uploaded
wolma
parents:
diff changeset
110 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
111 </conditional>
2f47755c35da Uploaded
wolma
parents:
diff changeset
112 </repeat>
2f47755c35da Uploaded
wolma
parents:
diff changeset
113
2f47755c35da Uploaded
wolma
parents:
diff changeset
114 <param name="oformat" type="select" label="output file format">
2f47755c35da Uploaded
wolma
parents:
diff changeset
115 <option value="bam">BAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
116 <option value="sam">SAM</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
117 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
118
2f47755c35da Uploaded
wolma
parents:
diff changeset
119 ## optional arguments
2f47755c35da Uploaded
wolma
parents:
diff changeset
120
2f47755c35da Uploaded
wolma
parents:
diff changeset
121 <conditional name="set">
2f47755c35da Uploaded
wolma
parents:
diff changeset
122 <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the documentation first.">
2f47755c35da Uploaded
wolma
parents:
diff changeset
123 <option value="default">default settings</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
124 <option value="change">change settings</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
125 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
126
2f47755c35da Uploaded
wolma
parents:
diff changeset
127 ## default settings
2f47755c35da Uploaded
wolma
parents:
diff changeset
128
2f47755c35da Uploaded
wolma
parents:
diff changeset
129 <when value="default">
2f47755c35da Uploaded
wolma
parents:
diff changeset
130 <param name="seedsize" type="hidden" value="20"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
131 <param name="slack" type="hidden" value="0.3"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
132 <param name="sp_min" type="hidden" value="100"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
133 <param name="sp_max" type="hidden" value="10000"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
134 <param name="maxdist" type="hidden" value="8"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
135 <param name="confdiff" type="hidden" value="2"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
136 <param name="confadpt" type="hidden" value="7"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
137
2f47755c35da Uploaded
wolma
parents:
diff changeset
138 <param name="maxseeds" type="hidden" value="25"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
139 <param name="maxhits" type="hidden" value="250"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
140 <param name="clipping" type="hidden" value="++"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
141
2f47755c35da Uploaded
wolma
parents:
diff changeset
142 <param name="selectivity" type="hidden" value="off"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
143 <param name="filter_output" type="hidden" value="off"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
144 <param name="sort" type="hidden" value="0"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
145 <param name="mmatch_notation" type="hidden" value="general"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
146 <param name="max_mate_overlap" type="hidden" value="0" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
147 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
148
2f47755c35da Uploaded
wolma
parents:
diff changeset
149 ## change settings
2f47755c35da Uploaded
wolma
parents:
diff changeset
150
2f47755c35da Uploaded
wolma
parents:
diff changeset
151 <when value="change">
2f47755c35da Uploaded
wolma
parents:
diff changeset
152 <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
153 <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
154
2f47755c35da Uploaded
wolma
parents:
diff changeset
155 ## paired-end specific options
2f47755c35da Uploaded
wolma
parents:
diff changeset
156 <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
157 <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
158 <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
2f47755c35da Uploaded
wolma
parents:
diff changeset
159
2f47755c35da Uploaded
wolma
parents:
diff changeset
160 <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
161 <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
162 <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
163 <param name="confadpt" type="integer" value="7" label="adaptive confdiff behaviour (default: 7)" help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read; helps fine-tuning alignment accuracy in repetitive regions of the genome."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
164 <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
165 <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
2f47755c35da Uploaded
wolma
parents:
diff changeset
166 <option value="++">from back and front</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
167 <option value="-+">from back only</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
168 <option value="+-">from front only</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
169 <option value="--">no clipping</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
170 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
171 <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
2f47755c35da Uploaded
wolma
parents:
diff changeset
172 <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
2f47755c35da Uploaded
wolma
parents:
diff changeset
173 <option value="off">no filtering</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
174 <option value="a">aligned only</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
175 <option value="s">single-aligned only</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
176 <option value="u">unaligned only</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
177 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
178 <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
2f47755c35da Uploaded
wolma
parents:
diff changeset
179 <option value="0">sort by read coordinates</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
180 <option value="off">no sorting</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
181 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
182 <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
2f47755c35da Uploaded
wolma
parents:
diff changeset
183 <option value="general">use M for both matches and mismatches</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
184 <option value="differentiate">use = for matches, X for mismatches</option>
2f47755c35da Uploaded
wolma
parents:
diff changeset
185 </param>
2f47755c35da Uploaded
wolma
parents:
diff changeset
186 </when>
2f47755c35da Uploaded
wolma
parents:
diff changeset
187 </conditional>
2f47755c35da Uploaded
wolma
parents:
diff changeset
188 </inputs>
2f47755c35da Uploaded
wolma
parents:
diff changeset
189
2f47755c35da Uploaded
wolma
parents:
diff changeset
190 <outputs>
2f47755c35da Uploaded
wolma
parents:
diff changeset
191 <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
2f47755c35da Uploaded
wolma
parents:
diff changeset
192 <change_format>
2f47755c35da Uploaded
wolma
parents:
diff changeset
193 <when input="oformat" value="sam" format="sam"/>
2f47755c35da Uploaded
wolma
parents:
diff changeset
194 </change_format>
2f47755c35da Uploaded
wolma
parents:
diff changeset
195 </data>
2f47755c35da Uploaded
wolma
parents:
diff changeset
196 </outputs>
2f47755c35da Uploaded
wolma
parents:
diff changeset
197
2f47755c35da Uploaded
wolma
parents:
diff changeset
198 <help>
2f47755c35da Uploaded
wolma
parents:
diff changeset
199 .. class:: infomark
2f47755c35da Uploaded
wolma
parents:
diff changeset
200
2f47755c35da Uploaded
wolma
parents:
diff changeset
201 **What it does**
2f47755c35da Uploaded
wolma
parents:
diff changeset
202
2f47755c35da Uploaded
wolma
parents:
diff changeset
203 The tool aligns the sequenced reads in an arbitrary number of input datasets against a common reference genome and stores the results in a single, possibly multi-sample output file. It supports a variety of different sequenced reads input formats, i.e., SAM, BAM, fastq and gzipped fastq, and both single-end and paired-end data.
2f47755c35da Uploaded
wolma
parents:
diff changeset
204
2f47755c35da Uploaded
wolma
parents:
diff changeset
205 Internally, the tool uses the ultrafast, hashtable-based aligner SNAP (http://snap.cs.berkeley.edu), hence its name.
2f47755c35da Uploaded
wolma
parents:
diff changeset
206
2f47755c35da Uploaded
wolma
parents:
diff changeset
207 **Notes:**
2f47755c35da Uploaded
wolma
parents:
diff changeset
208
2f47755c35da Uploaded
wolma
parents:
diff changeset
209 1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to align gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.
2f47755c35da Uploaded
wolma
parents:
diff changeset
210
2f47755c35da Uploaded
wolma
parents:
diff changeset
211 2) To use paired-end fastq data with the tool the read mate information needs to be split over two fastq files in corresponding order.
2f47755c35da Uploaded
wolma
parents:
diff changeset
212
2f47755c35da Uploaded
wolma
parents:
diff changeset
213 **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
2f47755c35da Uploaded
wolma
parents:
diff changeset
214
2f47755c35da Uploaded
wolma
parents:
diff changeset
215 3) The tool supports the alignment of reads from the same sequencing run, but distributed across several input files.
2f47755c35da Uploaded
wolma
parents:
diff changeset
216
2f47755c35da Uploaded
wolma
parents:
diff changeset
217 Generally, it expects the reads from each input dataset to belong to one read-group and will abort with an error message if any input dataset declares more than one read group or sample names in its header. Different datasets, however, are allowed to contain reads from the same read-group (as indicated by matching read-group IDs and sample names in their headers), in which case the reads will be combined into one group in the output.
2f47755c35da Uploaded
wolma
parents:
diff changeset
218
2f47755c35da Uploaded
wolma
parents:
diff changeset
219 4) Read-group information is required for every input dataset!
2f47755c35da Uploaded
wolma
parents:
diff changeset
220
2f47755c35da Uploaded
wolma
parents:
diff changeset
221 We generally recommend to store NGS datasets in SAM/BAM format with run metadata stored in the file header. You can use the *NGS Run Annotation* and *Convert* tools to convert data in fastq format to SAM/BAM with added run information.
2f47755c35da Uploaded
wolma
parents:
diff changeset
222
2f47755c35da Uploaded
wolma
parents:
diff changeset
223 While it is not our recommended approach, you can, if you prefer it, align reads from fastq files or SAM/BAM files without header read-group information. To do so, you **must** specify a SAM file that provides the missing information in its header along with the input dataset. You can generate a SAM header file with the *NGS Run Annotation* tool.
2f47755c35da Uploaded
wolma
parents:
diff changeset
224
2f47755c35da Uploaded
wolma
parents:
diff changeset
225 Optionally, a SAM header file can also be used to replace existing read-group information in a headered SAM/BAM input file. This can be used to resolve read-group ID conflicts between multiple input files at tool runtime.
2f47755c35da Uploaded
wolma
parents:
diff changeset
226
2f47755c35da Uploaded
wolma
parents:
diff changeset
227 4) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap-batch``.
2f47755c35da Uploaded
wolma
parents:
diff changeset
228
2f47755c35da Uploaded
wolma
parents:
diff changeset
229 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
2f47755c35da Uploaded
wolma
parents:
diff changeset
230 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
2f47755c35da Uploaded
wolma
parents:
diff changeset
231 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
2f47755c35da Uploaded
wolma
parents:
diff changeset
232
2f47755c35da Uploaded
wolma
parents:
diff changeset
233 </help>
2f47755c35da Uploaded
wolma
parents:
diff changeset
234 </tool>
2f47755c35da Uploaded
wolma
parents:
diff changeset
235