comparison samtools_mpileup.xml @ 2:3aa48bcbc599 draft

Uploaded tarball for 0.0.3 version.
author devteam
date Wed, 12 Mar 2014 12:53:30 -0400
parents b47a418ccfdc
children da0203c3461a
comparison
equal deleted inserted replaced
1:b47a418ccfdc 2:3aa48bcbc599
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.2"> 1 <tool id="samtools_mpileup" name="MPileup" version="0.0.3">
2 <description>SNP and indel caller</description> 2 <description>SNP and indel caller</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.1.18">samtools</requirement> 4 <requirement type="package" version="0.1.19">samtools</requirement>
5 </requirements> 5 </requirements>
6 <command interpreter="python">samtools_wrapper.py 6 <command><![CDATA[
7 -p 'samtools mpileup' 7 #if $reference_source.reference_source_selector == "history":
8 --stdout "${output_log}" 8 ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup
9 #else:
10 samtools mpileup
11 #end if
9 #if $reference_source.reference_source_selector != "history": 12 #if $reference_source.reference_source_selector != "history":
10 -p '-f "${reference_source.ref_file.fields.path}"' 13 -f "${reference_source.ref_file.fields.path}"
11 #else: 14 #else:
12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input" 15 -f "${reference_source.ref_file}"
13 #end if 16 #end if
14 #for $i, $input_bam in enumerate( $reference_source.input_bams ): 17 #for $i, $input_bam in enumerate( $reference_source.input_bams ):
15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" 18 $input_bam.input_bam
16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
17 #end for 19 #end for
18 -p '
19 #if str( $advanced_options.advanced_options_selector ) == "advanced": 20 #if str( $advanced_options.advanced_options_selector ) == "advanced":
20 ${advanced_options.skip_anomalous_read_pairs} 21 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter":
22 #if $advanced_options.filter_by_flags.require_flags:
23 --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])}
24 #end if
25 #if $advanced_options.filter_by_flags.exclude_flags:
26 --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])}
27 #end if
28 #end if
29 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
30 -l "$pasted_regions"
31 #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history"
32 -l "$bed_regions"
33 #end if
34 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
35 -G "$excluded_read_groups"
36 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history"
37 -G "$read_groups"
38 #end if
39 ${advanced_options.skip_anomalous_read_pairs}
21 ${advanced_options.disable_probabilistic_realignment} 40 ${advanced_options.disable_probabilistic_realignment}
22 -C "${advanced_options.coefficient_for_downgrading}" 41 -C "${advanced_options.coefficient_for_downgrading}"
23 -d "${advanced_options.max_reads_per_bam}" 42 -d "${advanced_options.max_reads_per_bam}"
24 ${advanced_options.extended_BAQ_computation} 43 ${advanced_options.extended_BAQ_computation}
25 #if str( $advanced_options.position_list ) != 'None':
26 -l "${advanced_options.position_list}"
27 #end if
28 -q "${advanced_options.minimum_mapping_quality}" 44 -q "${advanced_options.minimum_mapping_quality}"
29 -Q "${advanced_options.minimum_base_quality}" 45 -Q "${advanced_options.minimum_base_quality}"
30 #if str( $advanced_options.region_string ): 46 #if str( $advanced_options.region_string ):
31 -r "${advanced_options.region_string}" 47 -r "${advanced_options.region_string}"
32 #end if 48 #end if
38 -g 54 -g
39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" 55 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}"
40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" 56 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}"
41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': 57 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" 58 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
59 -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}"
60 -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}"
61 ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample}
43 #else: 62 #else:
44 -I 63 -I
45 #end if 64 #end if
46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" 65 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}"
47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): 66 #if len( $genotype_likelihood_computation_type.platform_list_repeat ):
48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" 67 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }"
49 #end if 68 #end if
69 #else:
70 ${genotype_likelihood_computation_type.base_position_on_reads}
71 ${genotype_likelihood_computation_type.output_mapping_quality}
50 #end if 72 #end if
51 &gt; "${output_mpileup}" 73 > "$output_mpileup" 2> "$output_log"
52 ' 74 ]]></command>
53 </command> 75 <stdio>
54 <inputs> 76 <exit_code range="1:" level="fatal" description="Error" />
55 <conditional name="reference_source"> 77 </stdio>
56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> 78 <inputs>
57 <option value="cached">Locally cached</option> 79 <conditional name="reference_source">
58 <option value="history">History</option> 80 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
59 </param> 81 <option value="cached">Locally cached</option>
60 <when value="cached"> 82 <option value="history">History</option>
61 <repeat name="input_bams" title="BAM file" min="1">
62 <param name="input_bam" type="data" format="bam" label="BAM file">
63 <validator type="unspecified_build" />
64 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
65 </param>
66 </repeat>
67 <param name="ref_file" type="select" label="Using reference genome">
68 <options from_data_table="fasta_indexes">
69 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...-->
70 </options>
71 </param>
72 </when>
73 <when value="history"> <!-- FIX ME!!!! -->
74 <repeat name="input_bams" title="BAM file" min="1">
75 <param name="input_bam" type="data" format="bam" label="BAM file">
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." />
77 </param>
78 </repeat>
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
80 </when>
81 </conditional>
82
83
84 <conditional name="genotype_likelihood_computation_type">
85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option>
87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option>
88 </param>
89 <when value="perform_genotype_likelihood_computation">
90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" />
91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." />
92 <conditional name="perform_indel_calling">
93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option>
95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
96 </param> 83 </param>
97 <when value="perform_indel_calling"> 84 <when value="cached">
98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> 85 <repeat name="input_bams" title="BAM file" min="1">
86 <param name="input_bam" type="data" format="bam" label="BAM file">
87 <validator type="unspecified_build" />
88 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
89 </param>
90 </repeat>
91 <param name="ref_file" type="select" label="Using reference genome">
92 <options from_data_table="fasta_indexes" />
93 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...-->
94 </param>
99 </when> 95 </when>
100 <when value="do_not_perform_indel_calling" /> 96 <when value="history">
101 </conditional> 97 <repeat name="input_bams" title="BAM file" min="1">
102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> 98 <param name="input_bam" type="data" format="bam" label="BAM file">
103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> 99 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." />
104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> 100 </param>
105 </repeat> 101 </repeat>
106 </when> 102 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
107 <when value="do_not_perform_genotype_likelihood_computation"> 103 </when>
108 <!-- Do nothing here --> 104 </conditional>
109 </when> 105 <conditional name="genotype_likelihood_computation_type">
110 </conditional> 106 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
111 <conditional name="advanced_options"> 107 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option>
112 <param name="advanced_options_selector" type="select" label="Set advanced options"> 108 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option>
113 <option value="basic" selected="True">Basic</option> 109 </param>
114 <option value="advanced">Advanced</option> 110 <when value="perform_genotype_likelihood_computation">
115 </param> 111 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" />
116 <when value="advanced"> 112 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." />
117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> 113 <conditional name="perform_indel_calling">
118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> 114 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> 115 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option>
120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> 116 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> 117 </param>
122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> 118 <when value="perform_indel_calling">
123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> 119 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" />
124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> 120 <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" />
125 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> 121 <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads for candidates" />
126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> 122 <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply minimum values on a per-sample basis" />
127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> 123 </when>
128 </when> 124 <when value="do_not_perform_indel_calling" />
129 <when value="basic" /> 125 </conditional>
130 </conditional> 126 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" />
131 </inputs> 127 <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
132 <outputs> 128 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" />
133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> 129 </repeat>
134 <change_format> 130 </when>
135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> 131 <when value="do_not_perform_genotype_likelihood_computation">
136 </change_format> 132 <param name="base_position_on_reads" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" />
137 </data> 133 <param name="output_mapping_quality" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" />
138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> 134 </when>
139 </outputs> 135 </conditional>
140 <tests> 136 <conditional name="advanced_options">
141 <test> 137 <param name="advanced_options_selector" type="select" label="Set advanced options">
142 <param name="reference_source_selector" value="history" /> 138 <option value="basic" selected="True">Basic</option>
143 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> 139 <option value="advanced">Advanced</option>
144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> 140 </param>
145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> 141 <when value="advanced">
146 <param name="advanced_options_selector" value="basic" /> 142 <conditional name="filter_by_flags">
147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> 143 <param name="filter_flags" type="select" label="Set filter by flags">
148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> 144 <option value="nofilter" selected="True">Do not filter</option>
149 </test> 145 <option value="filter">Filter by flags to exclude or require</option>
150 <test> 146 </param>
151 <param name="reference_source_selector" value="history" /> 147 <when value="filter">
152 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> 148 <param name="require_flags" type="select" display="checkboxes" multiple="True" label="Require">
153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> 149 <option value="1">Read is paired</option>
154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> 150 <option value="2">Read is mapped in a proper pair</option>
155 <param name="gap_extension_sequencing_error_probability" value="20" /> 151 <option value="4">The read is unmapped</option>
156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> 152 <option value="8">The mate is unmapped</option>
157 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> 153 <option value="16">Read strand</option>
158 <param name="skip_indel_calling_above_sample_depth" value="250" /> 154 <option value="32">Mate strand</option>
159 <param name="gap_open_sequencing_error_probability" value="40" /> 155 <option value="64">Read is the first in a pair</option>
160 <param name="platform_list_repeat" value="0" /> 156 <option value="128">Read is the second in a pair</option>
161 <param name="advanced_options_selector" value="basic" /> 157 <option value="256">The alignment or this read is not primary</option>
162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> 158 <option value="512">The read fails platform/vendor quality checks</option>
163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> 159 <option value="1024">The read is a PCR or optical duplicate</option>
164 </test> 160 </param>
165 </tests> 161 <param name="exclude_flags" type="select" display="checkboxes" multiple="True" label="Exclude">
166 <help> 162 <option value="1">Read is paired</option>
163 <option value="2">Read is mapped in a proper pair</option>
164 <option value="4">The read is unmapped</option>
165 <option value="8">The mate is unmapped</option>
166 <option value="16">Read strand</option>
167 <option value="32">Mate strand</option>
168 <option value="64">Read is the first in a pair</option>
169 <option value="128">Read is the second in a pair</option>
170 <option value="256">The alignment or this read is not primary</option>
171 <option value="512">The read fails platform/vendor quality checks</option>
172 <option value="1024">The read is a PCR or optical duplicate</option>
173 </param>
174 </when>
175 <when value="nofilter" />
176 </conditional>
177 <conditional name="limit_by_region">
178 <param name="limit_by_regions" type="select" label="Select regions to call">
179 <option value="no_limit" selected="True">Do not limit</option>
180 <option value="history">From an uploaded BED file</option>
181 <option value="paste">Paste a list of regions or BED</option>
182 </param>
183 <when value="history">
184 <param name="bed_regions" type="data" format="bed" label="BED file">
185 <validator type="dataset_ok_validator" />
186 </param>
187 </when>
188 <when value="paste">
189 <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions"/>
190 </when>
191 <when value="no_limit" />
192 </conditional>
193 <conditional name="exclude_read_group">
194 <param name="exclude_read_groups" type="select" label="Select read groups to exclude">
195 <option value="no_limit" selected="True">Do not exclude</option>
196 <option value="history">From an uploaded text file</option>
197 <option value="paste">Paste a list of read groups</option>
198 </param>
199 <when value="history">
200 <param name="read_groups" type="data" format="txt" label="Text file">
201 <validator type="dataset_ok_validator" />
202 </param>
203 </when>
204 <when value="paste">
205 <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups"/>
206 </when>
207 <when value="no_limit" />
208 </conditional>
209 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" />
210 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" />
211 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" />
212 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" />
213 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" />
214 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
215 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" />
216 <param name="region_string" type="text" value="" label="Only generate pileup in region" />
217 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" />
218 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" />
219 </when>
220 <when value="basic" />
221 </conditional>
222 </inputs>
223 <configfiles>
224 <configfile name="excluded_read_groups">
225 <![CDATA[
226 <%
227 import re
228 %>
229 #set pasted_data = ''
230 #if str( $advanced_options.advanced_options_selector ) == "advanced":
231 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste":
232 #set regex=re.compile("\\s+")
233 #set pasted_data = '\t'.join( regex.split( str( $advanced_options.exclude_read_group['read_groups'] ) ) )
234 #end if
235 #end if
236 ${pasted_data}
237 ]]>
238 </configfile>
239 <configfile name="pasted_regions">
240 <![CDATA[
241 <%
242 import re
243 %>
244 #set pasted_data = ''
245 #if str( $advanced_options.advanced_options_selector ) == "advanced":
246 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste":
247 #set regex=re.compile("\\s+")
248 #set pasted_data = '\t'.join( regex.split( str( $advanced_options.limit_by_region['region_paste'] ) ) )
249 #end if
250 #end if
251 ${pasted_data}
252 ]]>
253 </configfile>
254 </configfiles>
255 <outputs>
256 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}">
257 <change_format>
258 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" />
259 </change_format>
260 </data>
261 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
262 </outputs>
263 <tests>
264 <test>
265 <param name="reference_source_selector" value="history" />
266 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
267 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" />
268 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
269 <param name="advanced_options_selector" value="basic" />
270 <param name="base_position_on_reads" value="true" />
271 <param name="output_mapping_quality" value="true" />
272 <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" />
273 <output name="output_log" file="samtools_mpileup_out_1.log" />
274 </test>
275 <test>
276 <param name="reference_source_selector" value="history" />
277 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
278 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" />
279 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
280 <param name="gap_extension_sequencing_error_probability" value="20" />
281 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
282 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
283 <param name="skip_indel_calling_above_sample_depth" value="250" />
284 <param name="gap_open_sequencing_error_probability" value="40" />
285 <param name="platform_list_repeat" value="0" />
286 <param name="advanced_options_selector" value="basic" />
287 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" />
288 <output name="output_log" file="samtools_mpileup_out_2.log" />
289 </test>
290 <test>
291 <param name="reference_source_selector" value="cached" />
292 <param name="input_bam" value="samtools_mpileup_in_3.bam" ftype="bam" dbkey="phiX" />
293 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
294 <param name="gap_extension_sequencing_error_probability" value="20" />
295 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
296 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
297 <param name="skip_indel_calling_above_sample_depth" value="250" />
298 <param name="gap_open_sequencing_error_probability" value="40" />
299 <param name="platform_list_repeat" value="0" />
300 <param name="advanced_options_selector" value="basic" />
301 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" />
302 <output name="output_log" file="samtools_mpileup_out_3.log" />
303 </test>
304 <test>
305 <param name="reference_source_selector" value="cached" />
306 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" dbkey="phiX" />
307 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
308 <param name="gap_extension_sequencing_error_probability" value="20" />
309 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
310 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
311 <param name="skip_indel_calling_above_sample_depth" value="250" />
312 <param name="gap_open_sequencing_error_probability" value="40" />
313 <param name="platform_list_repeat" value="0" />
314 <param name="advanced_options_selector" value="advanced" />
315 <param name="advanced_options|filter_by_flags|filter_flags" value="nofilter" />
316 <param name="advanced_options|limit_by_region|limit_by_regions" value="no_limit" />
317 <param name="advanced_options|coefficient_for_downgrading" value="true" />
318 <param name="advanced_options|max_reads_per_bam" value="200" />
319 <param name="advanced_options|extended_BAQ_computation" value="true" />
320 <param name="advanced_options|minimum_mapping_quality" value="0" />
321 <param name="advanced_options|minimum_base_quality" value="43" />
322 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_4.bcf" lines_diff="1" />
323 <output name="output_log" file="samtools_mpileup_out_4.log" />
324 </test>
325 </tests>
326 <help>
167 **What it does** 327 **What it does**
168 328
169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. 329 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
170 330
171 ------ 331 ------
172 332
173 **Settings**:: 333 .. list-table:: **Input options**
174 334 :widths: 5 5 40 10
175 Input Options: 335 :header-rows: 1
176 -6 Assume the quality is in the Illumina 1.3+ encoding. 336
177 -A Do not skip anomalous read pairs in variant calling. 337 * - Flag
178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. 338 - Type
179 -b FILE List of input BAM files, one file per line [null] 339 - Description
180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] 340 - Default
181 -d INT At a position, read maximally INT reads per input BAM. [250] 341 * - -6
182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. 342 - *BOOLEAN*
183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] 343 - assume the quality is in the Illumina-1.3+ encoding
184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] 344 - off
185 -q INT Minimum mapping quality for an alignment to be used [0] 345 * - -A
186 -Q INT Minimum base quality for a base to be considered [13] 346 - *BOOLEAN*
187 -r STR Only generate pileup in region STR [all sites] 347 - count anomalous read pairs
188 Output Options: 348 - off
189 349 * - -B
190 -D Output per-sample read depth 350 - *BOOLEAN*
191 -g Compute genotype likelihoods and output them in the binary call format (BCF). 351 - disable BAQ computation
192 -S Output per-sample Phred-scaled strand bias P-value 352 - off
193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. 353 * - -b
194 354 - *FILE*
195 Options for Genotype Likelihood Computation (for -g or -u): 355 - list of input BAM filenames, one per line
196 356 - *null*
197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] 357 * - -C
198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] 358 - *INT*
199 -I Do not perform INDEL calling 359 - parameter for adjusting mapQ; 0 to disable
200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] 360 - 0
201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] 361 * - -d
202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] 362 - *INT*
363 - max per-BAM depth to avoid excessive memory usage
364 - 250
365 * - -E
366 - *BOOLEAN*
367 - recalculate extended BAQ on the fly thus ignoring existing BQs
368 - off
369 * - -f
370 - *FILE*
371 - faidx indexed reference sequence file
372 - *null*
373 * - -G
374 - *FILE*
375 - exclude read groups listed in FILE
376 - *null*
377 * - -l
378 - *FILE*
379 - list of positions (chr pos) or regions (BED)
380 - *null*
381 * - -M
382 - *INT*
383 - cap mapping quality at INT
384 - 60
385 * - -r
386 - *STR*
387 - region in which pileup is generated
388 - *null*
389 * - -R
390 - *BOOLEAN*
391 - ignore RG tags
392 - off
393 * - -q
394 - *INT*
395 - skip alignments with mapQ smaller than INT
396 - 0
397 * - -Q
398 - *INT*
399 - skip bases with baseQ/BAQ smaller than INT
400 - 13
401 * - --rf
402 - *INT*
403 - required flags: skip reads with mask bits unset
404 - 0
405 * - --ff
406 - *INT*
407 - filter flags: skip reads with mask bits set
408 - 0
203 409
204 ------ 410 ------
205 411
412 .. list-table:: **Output options**
413 :widths: 5 5 40 10
414 :header-rows: 1
415
416 * - Flag
417 - Type
418 - Description
419 - Default
420 * - -D
421 - *BOOLEAN*
422 - output per-sample DP in BCF (require -g/-u)
423 - off
424 * - -g
425 - *BOOLEAN*
426 - generate BCF output (genotype likelihoods)
427 - off
428 * - -O
429 - *BOOLEAN*
430 - output base positions on reads (disabled by -g/-u)
431 - off
432 * - -s
433 - *BOOLEAN*
434 - output mapping quality (disabled by -g/-u)
435 - off
436 * - -S
437 - *BOOLEAN*
438 - output per-sample strand bias P-value in BCF (require -g/-u)
439 - off
440 * - -u
441 - *BOOLEAN*
442 - generate uncompressed BCF output
443 - off
444
445 ------
446
447 .. list-table:: **SNP/INDEL genotype likelihoods options (effective with '-g' or '-u')**
448 :widths: 5 5 40 10
449 :header-rows: 1
450
451 * - Flag
452 - Type
453 - Description
454 - Default
455 * - -e
456 - *INT*
457 - Phred-scaled gap extension seq error probability
458 - 20
459 * - -F
460 - *FLOAT*
461 - minimum fraction of gapped reads for candidates
462 - 0.002
463 * - -h
464 - *INT*
465 - coefficient for homopolymer errors
466 - 100
467 * - -I
468 - *BOOLEAN*
469 - do not perform indel calling
470 - off
471 * - -L
472 - *INT*
473 - max per-sample depth for INDEL calling
474 - 250
475 * - -m
476 - *INT*
477 - minimum gapped reads for indel candidates
478 - 1
479 * - -o
480 - *INT*
481 - Phred-scaled gap open sequencing error probability
482 - 40
483 * - -p
484 - *BOOLEAN*
485 - apply -m and -F per-sample to increase sensitivity
486 - off
487 * - -P
488 - *STR*
489 - comma separated list of platforms for indels
490 - all
491
492 ------
493
206 **Citation** 494 **Citation**
207 495
208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_ 496 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_
209 497
498
210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* 499 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
211 500 </help>
212 </help>
213 </tool> 501 </tool>