0
|
1 <tool id="samtools_mpileup" name="MPileup" version="0.0.1">
|
|
2 <description>SNP and indel caller</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.1.18">samtools</requirement>
|
|
5 </requirements>
|
|
6 <command interpreter="python">samtools_wrapper.py
|
|
7 -p 'samtools mpileup'
|
|
8 --stdout "${output_log}"
|
|
9 #if $reference_source.reference_source_selector != "history":
|
|
10 -p '-f "${reference_source.ref_file.fields.path}"'
|
|
11 #else:
|
|
12 -d "-f" "${reference_source.ref_file}" "fa" "reference_input"
|
|
13 #end if
|
|
14 #for $i, $input_bam in enumerate( $reference_source.input_bams ):
|
|
15 -d " " "${input_bam.input_bam}" "${input_bam.input_bam.ext}" "bam_input_${i}"
|
|
16 -d "" "${input_bam.input_bam.metadata.bam_index}" "bam_index" "bam_input_${i}" ##hardcode galaxy ext type as bam_index
|
|
17 #end for
|
|
18 -p '
|
|
19 #if str( $advanced_options.advanced_options_selector ) == "advanced":
|
|
20 ${advanced_options.skip_anomalous_read_pairs}
|
|
21 ${advanced_options.disable_probabilistic_realignment}
|
|
22 -C "${advanced_options.coefficient_for_downgrading}"
|
|
23 -d "${advanced_options.max_reads_per_bam}"
|
|
24 ${advanced_options.extended_BAQ_computation}
|
|
25 #if str( $advanced_options.position_list ) != 'None':
|
|
26 -l "${advanced_options.position_list}"
|
|
27 #end if
|
|
28 -q "${advanced_options.minimum_mapping_quality}"
|
|
29 -Q "${advanced_options.minimum_base_quality}"
|
|
30 #if str( $advanced_options.region_string ):
|
|
31 -r "${advanced_options.region_string}"
|
|
32 #end if
|
|
33 ${advanced_options.output_per_sample_read_depth}
|
|
34 ${advanced_options.output_per_sample_strand_bias_p_value}
|
|
35 #end if
|
|
36 #if str( $genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector ) == 'perform_genotype_likelihood_computation':
|
|
37 ##-g or -u
|
|
38 -g
|
|
39 -e "${genotype_likelihood_computation_type.gap_extension_sequencing_error_probability}"
|
|
40 -h "${genotype_likelihood_computation_type.coefficient_for_modeling_homopolymer_errors}"
|
|
41 #if str( $genotype_likelihood_computation_type.perform_indel_calling.perform_indel_calling_selector ) == 'perform_indel_calling':
|
|
42 -L "${genotype_likelihood_computation_type.perform_indel_calling.skip_indel_calling_above_sample_depth}"
|
|
43 #else:
|
|
44 -I
|
|
45 #end if
|
|
46 -o "${genotype_likelihood_computation_type.gap_open_sequencing_error_probability}"
|
|
47 #if len( $genotype_likelihood_computation_type.platform_list_repeat ):
|
|
48 -P "${ ",".join( [ str( platform.platform_entry ) for platform in $genotype_likelihood_computation_type.platform_list_repeat ] ) }"
|
|
49 #end if
|
|
50 #end if
|
|
51 > "${output_mpileup}"
|
|
52 '
|
|
53 </command>
|
|
54 <inputs>
|
|
55 <conditional name="reference_source">
|
|
56 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
|
|
57 <option value="cached">Locally cached</option>
|
|
58 <option value="history">History</option>
|
|
59 </param>
|
|
60 <when value="cached">
|
|
61 <repeat name="input_bams" title="BAM file" min="1">
|
|
62 <param name="input_bam" type="data" format="bam" label="BAM file">
|
|
63 <validator type="unspecified_build" />
|
|
64 <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="value" message="Sequences are not currently available for the specified build." /> <!-- fixme!!! this needs to be a select -->
|
|
65 </param>
|
|
66 </repeat>
|
|
67 <param name="ref_file" type="select" label="Using reference genome">
|
|
68 <options from_data_table="sam_fa_indexes">
|
|
69 <!-- <filter type="data_meta" key="dbkey" ref="input_bam" column="value"/> does not yet work in a repeat...-->
|
|
70 </options>
|
|
71 </param>
|
|
72 </when>
|
|
73 <when value="history"> <!-- FIX ME!!!! -->
|
|
74 <repeat name="input_bams" title="BAM file" min="1">
|
|
75 <param name="input_bam" type="data" format="bam" label="BAM file" >
|
|
76 <validator type="metadata" check="bam_index" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/>
|
|
77 </param>
|
|
78 </repeat>
|
|
79 <param name="ref_file" type="data" format="fasta" label="Using reference file" />
|
|
80 </when>
|
|
81 </conditional>
|
|
82
|
|
83
|
|
84 <conditional name="genotype_likelihood_computation_type">
|
|
85 <param name="genotype_likelihood_computation_type_selector" type="select" label="Genotype Likelihood Computation">
|
|
86 <option value="perform_genotype_likelihood_computation">Perform genotype likelihood computation</option>
|
|
87 <option value="do_not_perform_genotype_likelihood_computation" selected="True">Do not perform genotype likelihood computation</option>
|
|
88 </param>
|
|
89 <when value="perform_genotype_likelihood_computation">
|
|
90 <param name="gap_extension_sequencing_error_probability" type="integer" value="20" label="Phred-scaled gap extension sequencing error probability" />
|
|
91 <param name="coefficient_for_modeling_homopolymer_errors" type="integer" value="100" label="Coefficient for modeling homopolymer errors." />
|
|
92 <conditional name="perform_indel_calling">
|
|
93 <param name="perform_indel_calling_selector" type="select" label="Perform INDEL calling">
|
|
94 <option value="perform_indel_calling" selected="True">Perform INDEL calling</option>
|
|
95 <option value="do_not_perform_indel_calling">Do not perform INDEL calling</option>
|
|
96 </param>
|
|
97 <when value="perform_indel_calling">
|
|
98 <param name="skip_indel_calling_above_sample_depth" type="integer" value="250" label="Skip INDEL calling if the average per-sample depth is above" />
|
|
99 </when>
|
|
100 <when value="do_not_perform_indel_calling" />
|
|
101 </conditional>
|
|
102 <param name="gap_open_sequencing_error_probability" type="integer" value="40" label="Phred-scaled gap open sequencing error probability" />
|
|
103 <repeat name="platform_list_repeat" title="Platform for INDEL candidates">
|
|
104 <param name="platform_entry" type="text" value="" label="Platform to use for INDEL candidates" />
|
|
105 </repeat>
|
|
106 </when>
|
|
107 <when value="do_not_perform_genotype_likelihood_computation">
|
|
108 <!-- Do nothing here -->
|
|
109 </when>
|
|
110 </conditional>
|
|
111 <conditional name="advanced_options">
|
|
112 <param name="advanced_options_selector" type="select" label="Set advanced options">
|
|
113 <option value="basic" selected="True">Basic</option>
|
|
114 <option value="advanced">Advanced</option>
|
|
115 </param>
|
|
116 <when value="advanced">
|
|
117 <param name="skip_anomalous_read_pairs" type="boolean" truevalue="-A" falsevalue="" checked="False" label="Do not skip anomalous read pairs in variant calling" />
|
|
118 <param name="disable_probabilistic_realignment" type="boolean" truevalue="-B" falsevalue="" checked="False" label=" Disable probabilistic realignment for the computation of base alignment quality (BAQ)" />
|
|
119 <param name="coefficient_for_downgrading" type="integer" value="0" label="Coefficient for downgrading mapping quality for reads containing excessive mismatches" />
|
|
120 <param name="max_reads_per_bam" type="integer" value="250" label="Max reads per BAM" />
|
|
121 <param name="extended_BAQ_computation" type="boolean" truevalue="-E" falsevalue="" checked="False" label="Extended BAQ computation" />
|
|
122 <param name="position_list" type="data" format="bed" label="List of regions or sites on which to operate" optional="True" />
|
|
123 <param name="minimum_mapping_quality" type="integer" value="0" label="Minimum mapping quality for an alignment to be used" />
|
|
124 <param name="minimum_base_quality" type="integer" value="13" label="Minimum base quality for a base to be considered" />
|
|
125 <param name="region_string" type="text" value="" label="Only generate pileup in region" />
|
|
126 <param name="output_per_sample_read_depth" type="boolean" truevalue="-D" falsevalue="" checked="False" label="Output per-sample read depth" />
|
|
127 <param name="output_per_sample_strand_bias_p_value" type="boolean" truevalue="-S" falsevalue="" checked="False" label="Output per-sample Phred-scaled strand bias P-value" />
|
|
128 </when>
|
|
129 <when value="basic" />
|
|
130 </conditional>
|
|
131 </inputs>
|
|
132 <outputs>
|
|
133 <data format="pileup" name="output_mpileup" label="${tool.name} on ${on_string}">
|
|
134 <change_format>
|
|
135 <when input="genotype_likelihood_computation_type.genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" format="bcf" />
|
|
136 </change_format>
|
|
137 </data>
|
|
138 <data format="txt" name="output_log" label="${tool.name} on ${on_string} (log)" />
|
|
139 </outputs>
|
|
140 <tests>
|
|
141 <test>
|
|
142 <param name="reference_source_selector" value="history" />
|
|
143 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
|
|
144 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
|
|
145 <param name="genotype_likelihood_computation_type_selector" value="do_not_perform_genotype_likelihood_computation" />
|
|
146 <param name="advanced_options_selector" value="basic" />
|
|
147 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_1.pileup" />
|
|
148 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" />
|
|
149 </test>
|
|
150 <test>
|
|
151 <param name="reference_source_selector" value="history" />
|
|
152 <param name="ref_file" value="phiX.fasta" ftype="fasta" />
|
|
153 <param name="input_bam" value="gatk/gatk_table_recalibration/gatk_table_recalibration_out_1.bam" ftype="bam" />
|
|
154 <param name="genotype_likelihood_computation_type_selector" value="perform_genotype_likelihood_computation" />
|
|
155 <param name="gap_extension_sequencing_error_probability" value="20" />
|
|
156 <param name="coefficient_for_modeling_homopolymer_errors" value="100" />
|
|
157 <param name="perform_indel_calling_selector" value="perform_indel_calling" />
|
|
158 <param name="skip_indel_calling_above_sample_depth" value="250" />
|
|
159 <param name="gap_open_sequencing_error_probability" value="40" />
|
|
160 <param name="platform_list_repeat" value="0" />
|
|
161 <param name="advanced_options_selector" value="basic" />
|
|
162 <output name="output_mpileup" file="samtools/mpileup/samtools_mpileup_out_2.bcf" />
|
|
163 <output name="output_log" file="samtools/mpileup/samtools_mpileup_out_1.log" />
|
|
164 </test>
|
|
165 </tests>
|
|
166 <help>
|
|
167 **What it does**
|
|
168
|
|
169 Generate BCF or pileup for one or multiple BAM files. Alignment records are grouped by sample identifiers in @RG header lines. If sample identifiers are absent, each input file is regarded as one sample.
|
|
170
|
|
171 ------
|
|
172
|
|
173 **Settings**::
|
|
174
|
|
175 Input Options:
|
|
176 -6 Assume the quality is in the Illumina 1.3+ encoding.
|
|
177 -A Do not skip anomalous read pairs in variant calling.
|
|
178 -B Disable probabilistic realignment for the computation of base alignment quality (BAQ). BAQ is the Phred-scaled probability of a read base being misaligned. Applying this option greatly helps to reduce false SNPs caused by misalignments.
|
|
179 -b FILE List of input BAM files, one file per line [null]
|
|
180 -C INT Coefficient for downgrading mapping quality for reads containing excessive mismatches. Given a read with a phred-scaled probability q of being generated from the mapped position, the new mapping quality is about sqrt((INT-q)/INT)*INT. A zero value disables this functionality; if enabled, the recommended value for BWA is 50. [0]
|
|
181 -d INT At a position, read maximally INT reads per input BAM. [250]
|
|
182 -E Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt specificity a little bit.
|
|
183 -f FILE The faidx-indexed reference file in the FASTA format. The file can be optionally compressed by razip. [null]
|
|
184 -l FILE BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
|
|
185 -q INT Minimum mapping quality for an alignment to be used [0]
|
|
186 -Q INT Minimum base quality for a base to be considered [13]
|
|
187 -r STR Only generate pileup in region STR [all sites]
|
|
188 Output Options:
|
|
189
|
|
190 -D Output per-sample read depth
|
|
191 -g Compute genotype likelihoods and output them in the binary call format (BCF).
|
|
192 -S Output per-sample Phred-scaled strand bias P-value
|
|
193 -u Similar to -g except that the output is uncompressed BCF, which is preferred for piping.
|
|
194
|
|
195 Options for Genotype Likelihood Computation (for -g or -u):
|
|
196
|
|
197 -e INT Phred-scaled gap extension sequencing error probability. Reducing INT leads to longer indels. [20]
|
|
198 -h INT Coefficient for modeling homopolymer errors. Given an l-long homopolymer run, the sequencing error of an indel of size s is modeled as INT*s/l. [100]
|
|
199 -I Do not perform INDEL calling
|
|
200 -L INT Skip INDEL calling if the average per-sample depth is above INT. [250]
|
|
201 -o INT Phred-scaled gap open sequencing error probability. Reducing INT leads to more indel calls. [40]
|
|
202 -P STR Comma dilimited list of platforms (determined by @RG-PL) from which indel candidates are obtained. It is recommended to collect indel candidates from sequencing technologies that have low indel error rate such as ILLUMINA. [all]
|
|
203
|
|
204 ------
|
|
205
|
|
206 **Citation**
|
|
207
|
|
208 For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
|
|
209
|
|
210 If you use this tool in Galaxy, please cite Blankenberg D, et al. *In preparation.*
|
|
211
|
|
212 </help>
|
|
213 </tool>
|