Mercurial > repos > devteam > samtools_mpileup
comparison samtools_mpileup.xml @ 2:3aa48bcbc599 draft
Uploaded tarball for 0.0.3 version.
author | devteam |
---|---|
date | Wed, 12 Mar 2014 12:53:30 -0400 |
parents | b47a418ccfdc |
children | da0203c3461a |
comparison
equal
deleted
inserted
replaced
1:b47a418ccfdc | 2:3aa48bcbc599 |
---|---|
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.2"> | 1 <tool id="samtools_mpileup" name="MPileup" version="0.0.3"> |
2 <description>SNP and indel caller</description> | 2 <description>SNP and indel caller</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="0.1.18">samtools</requirement> | 4 <requirement type="package" version="0.1.19">samtools</requirement> |
5 </requirements> | 5 </requirements> |
6 <command interpreter="python">samtools_wrapper.py | 6 <command><![CDATA[ |
7 -p 'samtools mpileup' | 7 #if $reference_source.reference_source_selector == "history": |
8 --stdout "${output_log}" | 8 ln -s "${reference_source.ref_file}" && samtools faidx `basename "${reference_source.ref_file}"` && samtools mpileup |
9 #else: | |
10 samtools mpileup | |
11 #end if | |
9 #if $reference_source.reference_source_selector != "history": | 12 #if $reference_source.reference_source_selector != "history": |
10 -p '-f "${reference_source.ref_file.fields.path}"' | 13 -f "${reference_source.ref_file.fields.path}" |
11 #else: | 14 #else: |
12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input" | 15 -f "${reference_source.ref_file}" |
13 #end if | 16 #end if |
14 #for $i, $input_bam in enumerate( $reference_source.input_bams ): | 17 #for $i, $input_bam in enumerate( $reference_source.input_bams ): |
15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}" | 18 $input_bam.input_bam |
16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index | |
17 #end for | 19 #end for |
18 -p ' | |
19 #if str( $advanced_options.advanced_options_selector ) == "advanced": | 20 #if str( $advanced_options.advanced_options_selector ) == "advanced": |
20 ${advanced_options.skip_anomalous_read_pairs} | 21 #if str( $advanced_options.filter_by_flags.filter_flags ) == "filter": |
22 #if $advanced_options.filter_by_flags.require_flags: | |
23 --rf ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.require_flags).split(',')])} | |
24 #end if | |
25 #if $advanced_options.filter_by_flags.exclude_flags: | |
26 --ff ${sum([int(flag) for flag in str($advanced_options.filter_by_flags.exclude_flags).split(',')])} | |
27 #end if | |
28 #end if | |
29 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": | |
30 -l "$pasted_regions" | |
31 #elif str( $advanced_options.limit_by_region.limit_by_regions ) == "history" | |
32 -l "$bed_regions" | |
33 #end if | |
34 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": | |
35 -G "$excluded_read_groups" | |
36 #elif str( $advanced_options.exclude_read_group.exclude_read_groups ) == "history" | |
37 -G "$read_groups" | |
38 #end if | |
39 ${advanced_options.skip_anomalous_read_pairs} | |
21 ${advanced_options.disable_probabilistic_realignment} | 40 ${advanced_options.disable_probabilistic_realignment} |
22 -C "${advanced_options.coefficient_for_downgrading}" | 41 -C "${advanced_options.coefficient_for_downgrading}" |
23 -d "${advanced_options.max_reads_per_bam}" | 42 -d "${advanced_options.max_reads_per_bam}" |
24 ${advanced_options.extended_BAQ_computation} | 43 ${advanced_options.extended_BAQ_computation} |
25 #if str( $advanced_options.position_list ) != 'None': | |
26 -l "${advanced_options.position_list}" | |
27 #end if | |
28 -q "${advanced_options.minimum_mapping_quality}" | 44 -q "${advanced_options.minimum_mapping_quality}" |
29 -Q "${advanced_options.minimum_base_quality}" | 45 -Q "${advanced_options.minimum_base_quality}" |
30 #if str( $advanced_options.region_string ): | 46 #if str( $advanced_options.region_string ): |
31 -r "${advanced_options.region_string}" | 47 -r "${advanced_options.region_string}" |
32 #end if | 48 #end if |
38 -g | 54 -g |
39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" | 55 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}" |
40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" | 56 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}" |
41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': | 57 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling': |
42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" | 58 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}" |
59 -m "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_reads_for_indel_candidates}" | |
60 -F "${genotype_likelihood_computation_type.perform_indel_calling.minimum_gapped_read_fraction}" | |
61 ${genotype_likelihood_computation_type.perform_indel_calling.gapped_read_per_sample} | |
43 #else: | 62 #else: |
44 -I | 63 -I |
45 #end if | 64 #end if |
46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" | 65 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}" |
47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): | 66 #if len( $genotype_likelihood_computation_type.platform_list_repeat ): |
48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" | 67 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }" |
49 #end if | 68 #end if |
69 #else: | |
70 ${genotype_likelihood_computation_type.base_position_on_reads} | |
71 ${genotype_likelihood_computation_type.output_mapping_quality} | |
50 #end if | 72 #end if |
51 > "${output_mpileup}" | 73 > "$output_mpileup" 2> "$output_log" |
52 ' | 74 ]]></command> |
53 </command> | 75 <stdio> |
54 <inputs> | 76 <exit_code range="1:" level="fatal" description="Error" /> |
55 <conditional name="reference_source"> | 77 </stdio> |
56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> | 78 <inputs> |
57 <option value="cached">Locally cached</option> | 79 <conditional name="reference_source"> |
58 <option value="history">History</option> | 80 <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> |
59 </param> | 81 <option value="cached">Locally cached</option> |
60 <when value="cached"> | 82 <option value="history">History</option> |
61 <repeat name="input_bams" title="BAM file" min="1"> | |
62 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
63 <validator type="unspecified_build" /> | |
64 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> | |
65 </param> | |
66 </repeat> | |
67 <param name="ref_file" type="select" label="Using reference genome"> | |
68 <options from_data_table="fasta_indexes"> | |
69 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> | |
70 </options> | |
71 </param> | |
72 </when> | |
73 <when value="history"> <!-- FIX ME!!!! --> | |
74 <repeat name="input_bams" title="BAM file" min="1"> | |
75 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> | |
77 </param> | |
78 </repeat> | |
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> | |
80 </when> | |
81 </conditional> | |
82 | |
83 | |
84 <conditional name="genotype_likelihood_computation_type"> | |
85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> | |
86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> | |
87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> | |
88 </param> | |
89 <when value="perform_genotype_likelihood_computation"> | |
90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> | |
91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> | |
92 <conditional name="perform_indel_calling"> | |
93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> | |
94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> | |
95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> | |
96 </param> | 83 </param> |
97 <when value="perform_indel_calling"> | 84 <when value="cached"> |
98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> | 85 <repeat name="input_bams" title="BAM file" min="1"> |
86 <param name="input_bam" type="data" format="bam" label="BAM file"> | |
87 <validator type="unspecified_build" /> | |
88 <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select --> | |
89 </param> | |
90 </repeat> | |
91 <param name="ref_file" type="select" label="Using reference genome"> | |
92 <options from_data_table="fasta_indexes" /> | |
93 <!-- <filter type="data_meta" ref="input_bam" key="dbkey" column="1" /> does not yet work in a repeat...--> | |
94 </param> | |
99 </when> | 95 </when> |
100 <when value="do_not_perform_indel_calling" /> | 96 <when value="history"> |
101 </conditional> | 97 <repeat name="input_bams" title="BAM file" min="1"> |
102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> | 98 <param name="input_bam" type="data" format="bam" label="BAM file"> |
103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> | 99 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue." /> |
104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> | 100 </param> |
105 </repeat> | 101 </repeat> |
106 </when> | 102 <param name="ref_file" type="data" format="fasta" label="Using reference file" /> |
107 <when value="do_not_perform_genotype_likelihood_computation"> | 103 </when> |
108 <!-- Do nothing here --> | 104 </conditional> |
109 </when> | 105 <conditional name="genotype_likelihood_computation_type"> |
110 </conditional> | 106 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation"> |
111 <conditional name="advanced_options"> | 107 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option> |
112 <param name="advanced_options_selector" type="select" label="Set advanced options"> | 108 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option> |
113 <option value="basic" selected="True">Basic</option> | 109 </param> |
114 <option value="advanced">Advanced</option> | 110 <when value="perform_genotype_likelihood_computation"> |
115 </param> | 111 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" /> |
116 <when value="advanced"> | 112 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." /> |
117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> | 113 <conditional name="perform_indel_calling"> |
118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> | 114 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling"> |
119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> | 115 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option> |
120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> | 116 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option> |
121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> | 117 </param> |
122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" /> | 118 <when value="perform_indel_calling"> |
123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> | 119 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" /> |
124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> | 120 <param name="minimum_gapped_reads_for_indel_candidates" type="integer" value="1" label="Minimum gapped reads for indel candidates" /> |
125 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> | 121 <param name="minimum_gapped_read_fraction" type="float" value="0.002" label="Minimum fraction of gapped reads for candidates" /> |
126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> | 122 <param name="gapped_read_per_sample" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Apply minimum values on a per-sample basis" /> |
127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> | 123 </when> |
128 </when> | 124 <when value="do_not_perform_indel_calling" /> |
129 <when value="basic" /> | 125 </conditional> |
130 </conditional> | 126 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" /> |
131 </inputs> | 127 <repeat name="platform_list_repeat" title="Platform for INDEL candidates"> |
132 <outputs> | 128 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" /> |
133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> | 129 </repeat> |
134 <change_format> | 130 </when> |
135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> | 131 <when value="do_not_perform_genotype_likelihood_computation"> |
136 </change_format> | 132 <param name="base_position_on_reads" type="boolean" truevalue="-O" falsevalue="" checked="False" label="Output base positions on reads" /> |
137 </data> | 133 <param name="output_mapping_quality" type="boolean" truevalue="-s" falsevalue="" checked="False" label="Output mapping quality" /> |
138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | 134 </when> |
139 </outputs> | 135 </conditional> |
140 <tests> | 136 <conditional name="advanced_options"> |
141 <test> | 137 <param name="advanced_options_selector" type="select" label="Set advanced options"> |
142 <param name="reference_source_selector" value="history" /> | 138 <option value="basic" selected="True">Basic</option> |
143 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | 139 <option value="advanced">Advanced</option> |
144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | 140 </param> |
145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> | 141 <when value="advanced"> |
146 <param name="advanced_options_selector" value="basic" /> | 142 <conditional name="filter_by_flags"> |
147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" /> | 143 <param name="filter_flags" type="select" label="Set filter by flags"> |
148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | 144 <option value="nofilter" selected="True">Do not filter</option> |
149 </test> | 145 <option value="filter">Filter by flags to exclude or require</option> |
150 <test> | 146 </param> |
151 <param name="reference_source_selector" value="history" /> | 147 <when value="filter"> |
152 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | 148 <param name="require_flags" type="select" display="checkboxes" multiple="True" label="Require"> |
153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" /> | 149 <option value="1">Read is paired</option> |
154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | 150 <option value="2">Read is mapped in a proper pair</option> |
155 <param name="gap_extension_sequencing_error_probability" value="20" /> | 151 <option value="4">The read is unmapped</option> |
156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | 152 <option value="8">The mate is unmapped</option> |
157 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | 153 <option value="16">Read strand</option> |
158 <param name="skip_indel_calling_above_sample_depth" value="250" /> | 154 <option value="32">Mate strand</option> |
159 <param name="gap_open_sequencing_error_probability" value="40" /> | 155 <option value="64">Read is the first in a pair</option> |
160 <param name="platform_list_repeat" value="0" /> | 156 <option value="128">Read is the second in a pair</option> |
161 <param name="advanced_options_selector" value="basic" /> | 157 <option value="256">The alignment or this read is not primary</option> |
162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" /> | 158 <option value="512">The read fails platform/vendor quality checks</option> |
163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" /> | 159 <option value="1024">The read is a PCR or optical duplicate</option> |
164 </test> | 160 </param> |
165 </tests> | 161 <param name="exclude_flags" type="select" display="checkboxes" multiple="True" label="Exclude"> |
166 <help> | 162 <option value="1">Read is paired</option> |
163 <option value="2">Read is mapped in a proper pair</option> | |
164 <option value="4">The read is unmapped</option> | |
165 <option value="8">The mate is unmapped</option> | |
166 <option value="16">Read strand</option> | |
167 <option value="32">Mate strand</option> | |
168 <option value="64">Read is the first in a pair</option> | |
169 <option value="128">Read is the second in a pair</option> | |
170 <option value="256">The alignment or this read is not primary</option> | |
171 <option value="512">The read fails platform/vendor quality checks</option> | |
172 <option value="1024">The read is a PCR or optical duplicate</option> | |
173 </param> | |
174 </when> | |
175 <when value="nofilter" /> | |
176 </conditional> | |
177 <conditional name="limit_by_region"> | |
178 <param name="limit_by_regions" type="select" label="Select regions to call"> | |
179 <option value="no_limit" selected="True">Do not limit</option> | |
180 <option value="history">From an uploaded BED file</option> | |
181 <option value="paste">Paste a list of regions or BED</option> | |
182 </param> | |
183 <when value="history"> | |
184 <param name="bed_regions" type="data" format="bed" label="BED file"> | |
185 <validator type="dataset_ok_validator" /> | |
186 </param> | |
187 </when> | |
188 <when value="paste"> | |
189 <param name="region_paste" type="text" area="true" size="10x35" label="Regions" help="Paste a list of regions in BED format or as a list of chromosomes and positions"/> | |
190 </when> | |
191 <when value="no_limit" /> | |
192 </conditional> | |
193 <conditional name="exclude_read_group"> | |
194 <param name="exclude_read_groups" type="select" label="Select read groups to exclude"> | |
195 <option value="no_limit" selected="True">Do not exclude</option> | |
196 <option value="history">From an uploaded text file</option> | |
197 <option value="paste">Paste a list of read groups</option> | |
198 </param> | |
199 <when value="history"> | |
200 <param name="read_groups" type="data" format="txt" label="Text file"> | |
201 <validator type="dataset_ok_validator" /> | |
202 </param> | |
203 </when> | |
204 <when value="paste"> | |
205 <param name="group_paste" type="text" area="true" size="10x35" label="Read groups" help="Paste a list of read groups"/> | |
206 </when> | |
207 <when value="no_limit" /> | |
208 </conditional> | |
209 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" /> | |
210 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" /> | |
211 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" /> | |
212 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" /> | |
213 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" /> | |
214 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" /> | |
215 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" /> | |
216 <param name="region_string" type="text" value="" label="Only generate pileup in region" /> | |
217 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" /> | |
218 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" /> | |
219 </when> | |
220 <when value="basic" /> | |
221 </conditional> | |
222 </inputs> | |
223 <configfiles> | |
224 <configfile name="excluded_read_groups"> | |
225 <![CDATA[ | |
226 <% | |
227 import re | |
228 %> | |
229 #set pasted_data = '' | |
230 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
231 #if str( $advanced_options.exclude_read_group.exclude_read_groups ) == "paste": | |
232 #set regex=re.compile("\\s+") | |
233 #set pasted_data = '\t'.join( regex.split( str( $advanced_options.exclude_read_group['read_groups'] ) ) ) | |
234 #end if | |
235 #end if | |
236 ${pasted_data} | |
237 ]]> | |
238 </configfile> | |
239 <configfile name="pasted_regions"> | |
240 <![CDATA[ | |
241 <% | |
242 import re | |
243 %> | |
244 #set pasted_data = '' | |
245 #if str( $advanced_options.advanced_options_selector ) == "advanced": | |
246 #if str( $advanced_options.limit_by_region.limit_by_regions ) == "paste": | |
247 #set regex=re.compile("\\s+") | |
248 #set pasted_data = '\t'.join( regex.split( str( $advanced_options.limit_by_region['region_paste'] ) ) ) | |
249 #end if | |
250 #end if | |
251 ${pasted_data} | |
252 ]]> | |
253 </configfile> | |
254 </configfiles> | |
255 <outputs> | |
256 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}"> | |
257 <change_format> | |
258 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" /> | |
259 </change_format> | |
260 </data> | |
261 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" /> | |
262 </outputs> | |
263 <tests> | |
264 <test> | |
265 <param name="reference_source_selector" value="history" /> | |
266 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | |
267 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> | |
268 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" /> | |
269 <param name="advanced_options_selector" value="basic" /> | |
270 <param name="base_position_on_reads" value="true" /> | |
271 <param name="output_mapping_quality" value="true" /> | |
272 <output name="output_mpileup" file="samtools_mpileup_out_1.pileup" /> | |
273 <output name="output_log" file="samtools_mpileup_out_1.log" /> | |
274 </test> | |
275 <test> | |
276 <param name="reference_source_selector" value="history" /> | |
277 <param name="ref_file" value="phiX.fasta" ftype="fasta" /> | |
278 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" /> | |
279 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | |
280 <param name="gap_extension_sequencing_error_probability" value="20" /> | |
281 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | |
282 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | |
283 <param name="skip_indel_calling_above_sample_depth" value="250" /> | |
284 <param name="gap_open_sequencing_error_probability" value="40" /> | |
285 <param name="platform_list_repeat" value="0" /> | |
286 <param name="advanced_options_selector" value="basic" /> | |
287 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> | |
288 <output name="output_log" file="samtools_mpileup_out_2.log" /> | |
289 </test> | |
290 <test> | |
291 <param name="reference_source_selector" value="cached" /> | |
292 <param name="input_bam" value="samtools_mpileup_in_3.bam" ftype="bam" dbkey="phiX" /> | |
293 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | |
294 <param name="gap_extension_sequencing_error_probability" value="20" /> | |
295 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | |
296 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | |
297 <param name="skip_indel_calling_above_sample_depth" value="250" /> | |
298 <param name="gap_open_sequencing_error_probability" value="40" /> | |
299 <param name="platform_list_repeat" value="0" /> | |
300 <param name="advanced_options_selector" value="basic" /> | |
301 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_2.bcf" lines_diff="1" /> | |
302 <output name="output_log" file="samtools_mpileup_out_3.log" /> | |
303 </test> | |
304 <test> | |
305 <param name="reference_source_selector" value="cached" /> | |
306 <param name="input_bam" value="samtools_mpileup_in_1.bam" ftype="bam" dbkey="phiX" /> | |
307 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" /> | |
308 <param name="gap_extension_sequencing_error_probability" value="20" /> | |
309 <param name="coefficient_for_modeling_homopolymer_errors" value="100" /> | |
310 <param name="perform_indel_calling_selector" value="perform_indel_calling" /> | |
311 <param name="skip_indel_calling_above_sample_depth" value="250" /> | |
312 <param name="gap_open_sequencing_error_probability" value="40" /> | |
313 <param name="platform_list_repeat" value="0" /> | |
314 <param name="advanced_options_selector" value="advanced" /> | |
315 <param name="advanced_options|filter_by_flags|filter_flags" value="nofilter" /> | |
316 <param name="advanced_options|limit_by_region|limit_by_regions" value="no_limit" /> | |
317 <param name="advanced_options|coefficient_for_downgrading" value="true" /> | |
318 <param name="advanced_options|max_reads_per_bam" value="200" /> | |
319 <param name="advanced_options|extended_BAQ_computation" value="true" /> | |
320 <param name="advanced_options|minimum_mapping_quality" value="0" /> | |
321 <param name="advanced_options|minimum_base_quality" value="43" /> | |
322 <output name="output_mpileup" ftype="bcf" file="samtools_mpileup_out_4.bcf" lines_diff="1" /> | |
323 <output name="output_log" file="samtools_mpileup_out_4.log" /> | |
324 </test> | |
325 </tests> | |
326 <help> | |
167 **What it does** | 327 **What it does** |
168 | 328 |
169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. | 329 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample. |
170 | 330 |
171 ------ | 331 ------ |
172 | 332 |
173 **Settings**:: | 333 .. list-table:: **Input options** |
174 | 334 :widths: 5 5 40 10 |
175 Input Options: | 335 :header-rows: 1 |
176 -6 Assume the quality is in the Illumina 1.3+ encoding. | 336 |
177 -A Do not skip anomalous read pairs in variant calling. | 337 * - Flag |
178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments. | 338 - Type |
179 -b FILE List of input BAM files, one file per line [null] | 339 - Description |
180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0] | 340 - Default |
181 -d INT At a position, read maximally INT reads per input BAM. [250] | 341 * - -6 |
182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit. | 342 - *BOOLEAN* |
183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null] | 343 - assume the quality is in the Illumina-1.3+ encoding |
184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null] | 344 - off |
185 -q INT Minimum mapping quality for an alignment to be used [0] | 345 * - -A |
186 -Q INT Minimum base quality for a base to be considered [13] | 346 - *BOOLEAN* |
187 -r STR Only generate pileup in region STR [all sites] | 347 - count anomalous read pairs |
188 Output Options: | 348 - off |
189 | 349 * - -B |
190 -D Output per-sample read depth | 350 - *BOOLEAN* |
191 -g Compute genotype likelihoods and output them in the binary call format (BCF). | 351 - disable BAQ computation |
192 -S Output per-sample Phred-scaled strand bias P-value | 352 - off |
193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping. | 353 * - -b |
194 | 354 - *FILE* |
195 Options for Genotype Likelihood Computation (for -g or -u): | 355 - list of input BAM filenames, one per line |
196 | 356 - *null* |
197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20] | 357 * - -C |
198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100] | 358 - *INT* |
199 -I Do not perform INDEL calling | 359 - parameter for adjusting mapQ; 0 to disable |
200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250] | 360 - 0 |
201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40] | 361 * - -d |
202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all] | 362 - *INT* |
363 - max per-BAM depth to avoid excessive memory usage | |
364 - 250 | |
365 * - -E | |
366 - *BOOLEAN* | |
367 - recalculate extended BAQ on the fly thus ignoring existing BQs | |
368 - off | |
369 * - -f | |
370 - *FILE* | |
371 - faidx indexed reference sequence file | |
372 - *null* | |
373 * - -G | |
374 - *FILE* | |
375 - exclude read groups listed in FILE | |
376 - *null* | |
377 * - -l | |
378 - *FILE* | |
379 - list of positions (chr pos) or regions (BED) | |
380 - *null* | |
381 * - -M | |
382 - *INT* | |
383 - cap mapping quality at INT | |
384 - 60 | |
385 * - -r | |
386 - *STR* | |
387 - region in which pileup is generated | |
388 - *null* | |
389 * - -R | |
390 - *BOOLEAN* | |
391 - ignore RG tags | |
392 - off | |
393 * - -q | |
394 - *INT* | |
395 - skip alignments with mapQ smaller than INT | |
396 - 0 | |
397 * - -Q | |
398 - *INT* | |
399 - skip bases with baseQ/BAQ smaller than INT | |
400 - 13 | |
401 * - --rf | |
402 - *INT* | |
403 - required flags: skip reads with mask bits unset | |
404 - 0 | |
405 * - --ff | |
406 - *INT* | |
407 - filter flags: skip reads with mask bits set | |
408 - 0 | |
203 | 409 |
204 ------ | 410 ------ |
205 | 411 |
412 .. list-table:: **Output options** | |
413 :widths: 5 5 40 10 | |
414 :header-rows: 1 | |
415 | |
416 * - Flag | |
417 - Type | |
418 - Description | |
419 - Default | |
420 * - -D | |
421 - *BOOLEAN* | |
422 - output per-sample DP in BCF (require -g/-u) | |
423 - off | |
424 * - -g | |
425 - *BOOLEAN* | |
426 - generate BCF output (genotype likelihoods) | |
427 - off | |
428 * - -O | |
429 - *BOOLEAN* | |
430 - output base positions on reads (disabled by -g/-u) | |
431 - off | |
432 * - -s | |
433 - *BOOLEAN* | |
434 - output mapping quality (disabled by -g/-u) | |
435 - off | |
436 * - -S | |
437 - *BOOLEAN* | |
438 - output per-sample strand bias P-value in BCF (require -g/-u) | |
439 - off | |
440 * - -u | |
441 - *BOOLEAN* | |
442 - generate uncompressed BCF output | |
443 - off | |
444 | |
445 ------ | |
446 | |
447 .. list-table:: **SNP/INDEL genotype likelihoods options (effective with '-g' or '-u')** | |
448 :widths: 5 5 40 10 | |
449 :header-rows: 1 | |
450 | |
451 * - Flag | |
452 - Type | |
453 - Description | |
454 - Default | |
455 * - -e | |
456 - *INT* | |
457 - Phred-scaled gap extension seq error probability | |
458 - 20 | |
459 * - -F | |
460 - *FLOAT* | |
461 - minimum fraction of gapped reads for candidates | |
462 - 0.002 | |
463 * - -h | |
464 - *INT* | |
465 - coefficient for homopolymer errors | |
466 - 100 | |
467 * - -I | |
468 - *BOOLEAN* | |
469 - do not perform indel calling | |
470 - off | |
471 * - -L | |
472 - *INT* | |
473 - max per-sample depth for INDEL calling | |
474 - 250 | |
475 * - -m | |
476 - *INT* | |
477 - minimum gapped reads for indel candidates | |
478 - 1 | |
479 * - -o | |
480 - *INT* | |
481 - Phred-scaled gap open sequencing error probability | |
482 - 40 | |
483 * - -p | |
484 - *BOOLEAN* | |
485 - apply -m and -F per-sample to increase sensitivity | |
486 - off | |
487 * - -P | |
488 - *STR* | |
489 - comma separated list of platforms for indels | |
490 - all | |
491 | |
492 ------ | |
493 | |
206 **Citation** | 494 **Citation** |
207 | 495 |
208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ | 496 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_ |
209 | 497 |
498 | |
210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* | 499 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.* |
211 | 500 </help> |
212 </help> | |
213 </tool> | 501 </tool> |