|
37
|
1 <?xml version="1.0" encoding="UTF-8"?>
|
|
|
2 <tool id="samtools_parallel_mpileup" name="Samtools parallel mpileup">
|
|
38
|
3 <description>Samtools mpileup (supporting parallelization)</description>
|
|
37
|
4 <requirements>
|
|
71
|
5 <requirement type="package" version="5.9">ncurses</requirement>
|
|
74
|
6 <requirement type="package" version="0.1.19a">samtools_parallel_mpileup_0_1_19a</requirement>
|
|
66
|
7 <requirement type="package" version="0.1.19">package_samtools_0_1_19</requirement>
|
|
37
|
8 </requirements>
|
|
|
9 <command>
|
|
|
10 #if $reference_genome_source.source_select == "attribute" and len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) != 1
|
|
|
11 echo "Invalid number of dbkeys are found: ${ len({ alignment.metadata.dbkey:True for alignment in $alignments }.keys()) }, while only one should be used. Make sure that the alignments are done on the same reference genome and that 'tool-data/all_fasta.loc' is configured properly!" >&2
|
|
|
12 #else
|
|
|
13 #if $mpileup_parallelization.mpileup_parallelization_select == "true"
|
|
|
14 samtools-parallel-mpileup mpileup
|
|
|
15 -t $mpileup_parallelization.samtools_threads
|
|
|
16 #else
|
|
|
17 samtools mpileup
|
|
|
18 #end if
|
|
|
19 -f
|
|
|
20 #if $reference_genome_source.source_select == "indexed_filtered"
|
|
|
21 "$reference_genome_source.reference_genome"
|
|
|
22 #else if $reference_genome_source.source_select == "indexed_all"
|
|
|
23 "$reference_genome_source.reference_genome"
|
|
|
24 #else if $reference_genome_source.source_select == "history"
|
|
|
25 "$reference_genome_source.reference_genome"
|
|
|
26 #else
|
|
|
27 <!--
|
|
|
28 This is a workaround to obtain the "genome.fa" file that
|
|
|
29 corresponds to the dbkey of the alignments.
|
|
|
30 Because this file is "calculated" during run-time, it can
|
|
|
31 be used in a workflow.
|
|
|
32 -->
|
|
|
33 "${ filter( lambda x: str( x[0] ) == str( { alignment.metadata.dbkey:True for alignment in $alignments }.keys()[0] ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }"
|
|
|
34 #end if
|
|
|
35
|
|
|
36 #if $extended_parameters_regions.samtools_regions == "region"
|
|
|
37 -r $extended_parameters_regions.$samtools_r
|
|
|
38 #elif $extended_parameters_regions.samtools_regions == "regions_file_pos" or $extended_parameters_regions.samtools_regions == "regions_file_bed"
|
|
|
39 -l $extended_parameters_regions.$samtools_l
|
|
|
40 #end if
|
|
|
41
|
|
|
42 #if $extended_parameters.parameters == "extended"
|
|
|
43 $extended_parameters.samtools_6
|
|
|
44 $extended_parameters.samtools_A
|
|
|
45 $extended_parameters.samtools_B
|
|
|
46 -C $extended_parameters.samtools_C
|
|
|
47 -d $extended_parameters.samtools_d
|
|
|
48 $extended_parameters.samtools_E
|
|
|
49 -M $extended_parameters.samtools_M
|
|
|
50 $extended_parameters.samtools_R
|
|
|
51 -q $extended_parameters.samtools_q
|
|
|
52 -Q $extended_parameters.samtools_Q
|
|
|
53
|
|
|
54 -e $extended_parameters.samtools_e
|
|
|
55 -F $extended_parameters.samtools_F
|
|
|
56 -h $extended_parameters.samtools_h
|
|
|
57 $extended_parameters.samtools_I
|
|
|
58 -L $extended_parameters.samtools_L
|
|
|
59 -m $extended_parameters.samtools_m
|
|
|
60 -o $extended_parameters.samtools_o
|
|
|
61 $extended_parameters.samtools_p
|
|
|
62 -P $extended_parameters.samtools_P
|
|
|
63 #end if
|
|
|
64
|
|
|
65 #for $alignment in $alignments
|
|
|
66 ${alignment}
|
|
|
67 #end for
|
|
|
68
|
|
|
69 2> stderr_1.txt
|
|
55
|
70
|
|
|
71 #if $sort_mpileup
|
|
38
|
72 | sort -k 1,1 -k 2,2
|
|
|
73 #end if
|
|
55
|
74
|
|
37
|
75 > $output ;
|
|
|
76 cat stderr_1.txt
|
|
|
77 #end if
|
|
|
78 </command>
|
|
|
79
|
|
|
80 <inputs>
|
|
|
81 <param format="bam,sam" multiple="true" name="alignments" type="data" label="Alignment file" help="Mapped reads in BAM or SAM format."/>
|
|
|
82
|
|
|
83 <!-- Find out how to access the reference genome from the BAM file(s) -->
|
|
|
84 <conditional name="reference_genome_source">
|
|
|
85 <param name="source_select" type="select" label="Fasta Source">
|
|
|
86 <option value="indexed_filtered">Use a built-in index (which fits your reference)</option>
|
|
|
87 <option value="history">Use reference from the history</option>
|
|
|
88 <option value="indexed_all">Use a built-in index (entire list) - avoid this option if possible; only useful if you design a workflow</option>
|
|
|
89 <option value="attribute">Use a built-in index based on the 'metadata.dbkey' attribute; ideal in workflows</option>
|
|
|
90 </param>
|
|
|
91 <when value="indexed_filtered">
|
|
|
92 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
|
|
38
|
93 <options from_data_table="all_fasta">
|
|
|
94 <column name="name" index="2"/>
|
|
37
|
95 <column name="dbkey" index="1"/>
|
|
38
|
96 <column name="value" index="3"/><!-- Value is the path of the fasta file -->
|
|
37
|
97 <filter type="data_meta" ref="alignments" multiple="false" key="dbkey" column="1" />
|
|
|
98 <validator type="no_options" message="No indexes are available for the selected input dataset" />
|
|
|
99 </options>
|
|
|
100 </param>
|
|
|
101 </when>
|
|
|
102 <when value="history">
|
|
|
103 <param name="reference_genome" format="fasta" type="data" label="Reference Genome used during alignment (fasta)" help="Reference genome (genome.fa) that corresponds to the *.bam file." />
|
|
|
104 </when>
|
|
|
105 <when value="indexed_all">
|
|
|
106 <param name="reference_genome" type="select" label="Reference Genome used during alignment (fasta)" >
|
|
38
|
107 <options from_data_table="all_fasta">
|
|
|
108 <column name="name" index="2"/>
|
|
37
|
109 <column name="dbkey" index="1"/>
|
|
38
|
110 <column name="value" index="3"/><!-- Value is the path of the fasta file -->
|
|
37
|
111 <validator type="no_options" message="No indexes are available for the selected input dataset" />
|
|
|
112 </options>
|
|
|
113 </param>
|
|
|
114 </when>
|
|
|
115 <when value="attribute" />
|
|
|
116 </conditional>
|
|
|
117
|
|
|
118 <conditional name="extended_parameters_regions">
|
|
|
119 <param name="samtools_regions" type="select" label="Region specific parameters" help="Let samtools target specific genomic locations.">
|
|
|
120 <option value="entire_genome">Entire genome</option>
|
|
|
121 <option value="region">Specific region</option>
|
|
|
122 <option value="regions_file_pos">Specific positions (file); list of positions</option>
|
|
|
123 <option value="regions_file_bed">Specific regions (file); list of regions in BED</option>
|
|
|
124 </param>
|
|
|
125 <when value="entire_genome">
|
|
|
126 </when>
|
|
|
127 <when value="region">
|
|
|
128 <param type="text" name="samtools_r" label="Samtools: region in which pileup is generated" help="chr:pos or chr:start-end" />
|
|
|
129 </when>
|
|
|
130 <when value="regions_file_pos">
|
|
|
131 <param type="data" name="samtools_l" format="tabular" label="Samtools: list of positions (chr pos)" />
|
|
|
132 </when>
|
|
|
133 <when value="regions_file_bed">
|
|
38
|
134 <param type="data" name="samtools_l" format="bed" label="Samtools: specific regions (BED)" />
|
|
37
|
135 </when>
|
|
|
136 </conditional>
|
|
|
137
|
|
|
138 <conditional name="mpileup_parallelization">
|
|
|
139 <param name="mpileup_parallelization_select" type="select" label="Use parallelization for the mpileup generation (experimental)" help="Especially if larger numbers of bam/sam files are processed, or the file infrastructure is optimized for IO-paralellization, this feature might improve performance.">
|
|
|
140 <option value="false" >False - uses classical samtools</option>
|
|
|
141 <option value="true">True - uses (experimental) samtools mpileup-parallel</option>
|
|
|
142 </param>
|
|
|
143 <when value="false" />
|
|
|
144 <when value="true">
|
|
|
145 <param type="integer" name="samtools_threads" value="2" min="1" label="Samtools: mpileup threads" />
|
|
|
146 </when>
|
|
|
147 </conditional>
|
|
|
148
|
|
50
|
149 <param name="sort_mpileup" type="boolean" truevalue="true" falsevalue="false" label="Sort mpileup file" help="Because parallelization may disrupt the outputs order, sorting can be conveniet for e.g. testing. Notice that this function has only use in a limited number of situations but consumes (much) resources. Only use it if it's really neccesairy." />
|
|
38
|
150
|
|
37
|
151 <conditional name="extended_parameters">
|
|
|
152 <param name="parameters" type="select" label="Advanced parameters" help="For more advanced VarScan and samtools settings.">
|
|
|
153 <option value="default">Default settings</option>
|
|
|
154 <option value="extended">Extended settings</option>
|
|
|
155 </param>
|
|
38
|
156 <when value="default" />
|
|
37
|
157 <when value="extended">
|
|
|
158 <param type="boolean" name="samtools_6" falsevalue="" truevalue=" -6" label="Samtools: assume the quality is in the Illumina-1.3+ encoding" />
|
|
|
159 <param type="boolean" name="samtools_A" falsevalue="" truevalue=" -A" label="Samtools: count anomalous read pairs" />
|
|
|
160 <param type="boolean" name="samtools_B" falsevalue="" truevalue=" -B" label="Samtools: disable BAQ computation" />
|
|
38
|
161 <param type="integer" name="samtools_C" value="0" label="Samtools: parameter for adjusting mapQ; 0 to disable [0]" />
|
|
|
162 <param type="integer" name="samtools_d" value="250" label="Samtools: max per-BAM depth to avoid excessive memory usage [250]" />
|
|
37
|
163 <param type="boolean" name="samtools_E" falsevalue="" truevalue=" -E" label="Samtools: recalculate extended BAQ on the fly thus ignoring existing BQs" />
|
|
38
|
164 <param type="integer" name="samtools_M" value="60" label="cap mapping quality at INT [60]" />
|
|
37
|
165 <param type="boolean" name="samtools_R" falsevalue="" truevalue=" -R" label="Samtools: ignore RG tags" />
|
|
38
|
166 <param type="integer" name="samtools_q" value="0" label="Samtools: skip alignments with mapQ smaller than INT [0]" />
|
|
|
167 <param type="integer" name="samtools_Q" value="13" label="Samtools: skip bases with baseQ/BAQ smaller than INT [13]" />
|
|
37
|
168
|
|
38
|
169 <param type="integer" name="samtools_e" value="20" label="Samtools: Phred-scaled gap extension seq error probability [20]" />
|
|
|
170 <param type="float" name="samtools_F" value="0.002" label="Samtools: minimum fraction of gapped reads for candidates [0.002]" help="Alias: -F" />
|
|
|
171 <param type="integer" name="samtools_h" value="100" label="Samtools: coefficient for homopolymer errors [100]" />
|
|
37
|
172 <param type="boolean" name="samtools_I" falsevalue="" truevalue=" -I" label="Samtools: do not perform indel calling" />
|
|
38
|
173 <param type="integer" name="samtools_L" value="250" label="Samtools: max per-sample depth for INDEL calling [250]" />
|
|
|
174 <param type="integer" name="samtools_m" value="1" label="Samtools: minimum gapped reads for indel candidates [1]" help="Alias: -m" />
|
|
|
175 <param type="integer" name="samtools_o" value="40" label="Samtools: Phred-scaled gap open sequencing error probability [40]" />
|
|
37
|
176 <param type="boolean" name="samtools_p" falsevalue="" truevalue=" -p" label="Samtools: apply -m and -F per-sample to increase sensitivity" />
|
|
38
|
177 <param type="text" name="samtools_P" value="all" label="Samtools: comma separated list of platforms for indels [all]" />
|
|
37
|
178 </when>
|
|
|
179 </conditional>
|
|
|
180 </inputs>
|
|
|
181
|
|
|
182 <outputs>
|
|
|
183 <data format="mpileup" name="output" label="${tool.name} on ${', '.join([ str(a.hid)+': '+a.name for a in $alignments ])}" />
|
|
|
184 </outputs>
|
|
|
185
|
|
38
|
186 <tests>
|
|
|
187 <test><!-- Use classical samtools -->
|
|
51
|
188 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" ftype="bam" />
|
|
55
|
189 <param name="source_select" value="attribute" />
|
|
|
190 <param name="samtools_regions" value="entire_genome" />
|
|
38
|
191
|
|
55
|
192 <param name="mpileup_parallelization_select" value="false" />
|
|
38
|
193 <param name="sort_mpileup" value="true" />
|
|
|
194
|
|
55
|
195 <param name="parameters" value="default" />
|
|
38
|
196
|
|
|
197
|
|
|
198 <output name="output" file="hg19_mutant.mpileup" />
|
|
|
199 </test>
|
|
|
200 <test><!-- Use parallelized samtools -->
|
|
51
|
201 <param name="alignments" value="hg19_mutant.bam.txt" dbkey="hg19" ftype="bam" />
|
|
55
|
202 <param name="source_select" value="attribute" />
|
|
|
203 <param name="samtools_regions" value="entire_genome" />
|
|
38
|
204
|
|
55
|
205 <param name="mpileup_parallelization_select" value="true" />
|
|
|
206 <param name="samtools_threads" value="2" />
|
|
38
|
207 <param name="sort_mpileup" value="true" />
|
|
|
208
|
|
55
|
209 <param name="parameters" value="default" />
|
|
38
|
210
|
|
|
211
|
|
|
212 <output name="output" file="hg19_mutant.mpileup" />
|
|
|
213 </test>
|
|
|
214 </tests>
|
|
|
215
|
|
37
|
216 <help>
|
|
38
|
217 **Samtools mpileup (supporting parallelization)**
|
|
37
|
218
|
|
38
|
219 SAM (Sequence Alignment/Map) format is a generic format for storing large nucleotide sequence alignments. SAM aims to be a format that:
|
|
|
220
|
|
|
221 Is flexible enough to store all the alignment information generated by various alignment programs;
|
|
|
222 Is simple enough to be easily generated by alignment programs or converted from existing alignment formats;
|
|
|
223 Is compact in file size;
|
|
|
224 Allows most of operations on the alignment to work on a stream without loading the whole alignment into memory;
|
|
|
225 Allows the file to be indexed by genomic position to efficiently retrieve all reads aligning to a locus.
|
|
|
226 SAM Tools provide various utilities for manipulating alignments in the SAM format, including sorting, merging, indexing and generating alignments in a per-position format.
|
|
37
|
227
|
|
38
|
228 SAMtools is hosted by SourceForge.net. The project page is http://samtools.sourceforge.net/. The source code releases are available from the download page. You can check out the most recent source code from the github project page with:
|
|
|
229 git clone git://github.com/samtools/samtools.git
|
|
|
230 https://github.com/mydatascience/parallel-mpileup/
|
|
37
|
231
|
|
38
|
232 Because samtools does not support parallization of the mpileup command, the project was forked to include paralellization support:
|
|
|
233
|
|
|
234
|
|
|
235 However, since the project seems to lack support and contains fatal bugs this project was continued at:
|
|
|
236 https://github.com/yhoogstrate/parallel-mpileup/
|
|
|
237
|
|
37
|
238
|
|
|
239 **Input formats**
|
|
|
240
|
|
38
|
241 Satmools accepts sequencing alignments in the same, either SAM or BAM format (http://samtools.sourceforge.net/). The alignment files have to be linked to a reference genome by galaxy. This is indicated under every history item with e.g.: *"database: hg19"* for a link to hg19, or *"database: ?"* if the link is missing.
|
|
37
|
242
|
|
|
243 **Installation**
|
|
|
244
|
|
38
|
245 The installation is fully automatic.
|
|
37
|
246
|
|
|
247 **License**
|
|
|
248
|
|
38
|
249 * parallel-mpileup: MIT License (https://github.com/yhoogstrate/parallel-mpileup/blob/master/samtools-0.1.19/COPYING)
|
|
37
|
250 * samtool: MIT License
|
|
|
251
|
|
|
252
|
|
|
253 **Contact**
|
|
|
254
|
|
|
255 The tool wrapper has been written by Youri Hoogstrate from the Erasmus Medical Center (Rotterdam, Netherlands) on behalf of the Translational Research IT (TraIT) project:
|
|
|
256 http://www.ctmm.nl/en/programmas/infrastructuren/traitprojecttranslationeleresearch
|
|
|
257
|
|
|
258 More tools by the Translational Research IT (TraIT) project can be found in the following repository:
|
|
|
259 http://toolshed.dtls.nl/
|
|
|
260 </help>
|
|
38
|
261 </tool> |